﻿{---------------------------------------------------------------------------}
{                                                                           }
{ File:       Velthuis.BigIntegers.pas                                      }
{ Function:   A big integer implementation, with critical parts written in  }
{             Win32 or Win64 assembler, or "Pure Pascal" for other          }
{             platforms, or if explicitly specified.                        }
{ Language:   Delphi version XE2 or later                                   }
{ Author:     Rudy Velthuis                                                 }
{ Copyright:  (c) 2015, Rudy Velthuis                                       }
{ Notes:      See http://praxis-velthuis.de/rdc/programs/bigintegers.html   }
{                                                                           }
{             For tests, see BigIntegerDevelopmentTests.dproj. The data     }
{             for these tests are generated by a C# program, in the         }
{             Tests\CSharp\BigIntegerTestGenerator subdirectory.            }
{                                                                           }
{ Credits:                                                                  }
{             Thanks to Peter Cordes, Nils Pipenbrinck and Johan for their  }
{             help on StackOverflow:                                        }
{             - http://stackoverflow.com/a/32298732/95954                   }
{             - http://stackoverflow.com/a/32087095/95954                   }
{             - http://stackoverflow.com/a/32084357/95954                   }
{                                                                           }
{             Thanks to Agner Fog for his excellent optimization guides.    }
{                                                                           }
{ Literature:                                                               }
{             1. Donald Knuth, "The Art Of Computer Programming", 2nd ed.   }
{                Vol I-III.                                                 }
{             2. Karl Hasselström,                                          }
{                "Fast Division of Large Integers - A Comparison of         }
{                 Algorithms"                                               }
{                 bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/                  }
{                 Lectures/Hasselstrom2003.pdf                              }
{             3. Richard P. Brent and Paul Zimmermann,                      }
{                "Modern Computer Arithmetic"                               }
{                http://arxiv.org/pdf/1004.4710v1.pdf                       }
{             4. Christoph Burnikel, Joachim Ziegler                        }
{                "Fast Recursive Division"                                  }
{                cr.yp.to/bib/1998/burnikel.ps                              }
{             5. Hacker's Delight, e.g.                                     }
{                http://www.hackersdelight.org/basics2.pdf                  }
{             as well as many Wikipedia articles                            }
{                                                                           }
{ ------------------------------------------------------------------------- }
{                                                                           }
{ License:    Redistribution and use in source and binary forms, with or    }
{             without modification, are permitted provided that the         }
{             following conditions are met:                                 }
{                                                                           }
{             * Redistributions of source code must retain the above        }
{               copyright notice, this list of conditions and the following }
{               disclaimer.                                                 }
{             * Redistributions in binary form must reproduce the above     }
{               copyright notice, this list of conditions and the following }
{               disclaimer in the documentation and/or other materials      }
{               provided with the distribution.                             }
{                                                                           }
{ Disclaimer: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS"     }
{             AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT     }
{             LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND     }
{             FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO        }
{             EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE     }
{             FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,     }
{             OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,      }
{             PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     }
{             DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    }
{             AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   }
{             LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)        }
{             ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF   }
{             ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                    }
{                                                                           }
{---------------------------------------------------------------------------}

{---------------------------------------------------------------------------}
{ Corrections:                                                              }
{             2015.11.28: changed calls to System.@GetMem in                }
{             System.AllocMem. GetMem does not clear memory, causing the    }
{             occasional bad result.                                        }
{---------------------------------------------------------------------------}

unit Velthuis.BigIntegers;

interface

uses
  Velthuis.RandomNumbers, Types, System.SysUtils, Math;

// --- User settings ---

{$UNDEF DEBUG}

// Setting PUREPASCAL forces the use of plain Object Pascal for all routines, i.e. no assembler is used.

  {$DEFINE PUREPASCAL}


// Setting RESETSIZE forces the Compact routine to shrink the dynamic array when that makes sense.
// This can slow down code a little.

  { $DEFINE RESETSIZE}


// Experimental is set for code that tries something new without deleting the original code yet. Undefine it to get the original code.

  {$DEFINE Experimental}


// --- Permanent settings ---

// For Delphi XE3 and up:
{$IF CompilerVersion >= 24.0 }
  {$LEGACYIFEND ON}
{$IFEND}

// For Delphi XE and up:
{$IF CompilerVersion >= 22.0}
  {$CODEALIGN 16}
  {$ALIGN 16}
{$IFEND}

// Assembler is only supplied for Windows targets.
// For other targets, PUREPASCAL must be defined.

{$IFNDEF PUREPASCAL}
  {$IF NOT DEFINED(MSWINDOWS)}
    {$DEFINE PUREPASCAL}
  {$IFEND}
{$ENDIF}

const
{$IFDEF PUREPASCAL}
  PurePascal = True;
{$ELSE}
  PurePascal = False;
{$ENDIF}

  // This assumes an unroll factor of 4. Unrolling more (e.g. 8) does not improve performance anymore.
  // That was tested and removed again.
  CUnrollShift     = 2;
  CUnrollIncrement = 1 shl CUnrollShift;
  CunrollMask      = CUnrollIncrement - 1;

type
  BigIntegerException = Exception;

  TNumberBase = 2..36;

  PLimb = ^TLimb;                               // Knuth calls them "limbs".
  TLimb = type UInt32;
  TMagnitude = array of TLimb;                   // These BigIntegers use sign-magnitude format, hence the name.

  // TBigInteger uses a sign-magnitude representation, i.e. the magnitude is always interpreted as an unsigned big integer,
  // while the sign bit represents the sign. Currently, the sign bit is stored as the top bit of the FSize member.

  PBigInteger = ^BigInteger;
  BigInteger = record
  public
  {$REGION 'public constants, types and variables'}
    type
      /// <summary>TRoundingMode governs which rounding mode is used to convert from Double to BigInteger.</summary>
      /// <param name="rmTruncate">Truncates any fraction</param>
      /// <param name="rmSchool">Rounds any fraction >= 0.5 away from zero</param>
      /// <param name="rmRound">Rounds any fraction > 0.5 away from zero</param>
      TRoundingMode = (rmTruncate, rmSchool, rmRound);

    class var
      MinusOne: BigInteger;
      Zero: BigInteger;
      One: BigInteger;
      Ten: BigInteger;

    const
      CapacityMask = High(Integer) - 3;         // Mask ensuring that FData lengths are a multiple of 4, e.g. $7FFFFFFC
      SizeMask     = High(Integer);             // Mask to extract size part of FSize member, e.g. $7FFFFFFF
      SignMask     = Low(Integer);              // Mask to extract sign bit of FSize member, e.g. $80000000

      // TODO: check the values of these constants

  {$IFDEF PUREPASCAL}
    {$IFDEF CPUX64}                                             // 64PP = 64 bit, Pure Pascal
      KaratsubaThreshold             =   96;    // Checked
      ToomCook3Threshold             =  272;    // Checked
      BurnikelZieglerThreshold       =   91;    // Checked
      BurnikelZieglerOffsetThreshold =    5;    // Unchecked
      KaratsubaSqrThreshold          =   48;    // Unchecked
    {$ELSE CPUX86}                                              // 32PP = 32 bit, Pure Pascal
      KaratsubaThreshold             =   56;    // Checked
      ToomCook3Threshold             =  144;    // Checked
      BurnikelZieglerThreshold       =   91;    // Checked
      BurnikelZieglerOffsetThreshold =    5;    // Unchecked
      KaratsubaSqrThreshold          =   48;    // Unchecked
    {$ENDIF CPUX64}
  {$ELSE !PUREPASCAL}
    {$IFDEF CPUX64}                                             // 64A  = 64 bit, Assembler
      KaratsubaThreshold             =  256;    // Checked
      ToomCook3Threshold             =  768;    // Checked
      BurnikelZieglerThreshold       =  160;    // Checked
      BurnikelZieglerOffsetThreshold =   80;    // Unchecked
      KaratsubaSqrThreshold          =  256;    // Unchecked
    {$ELSE CPUX86}                                              // 32A  = 32 bit, Assembler
      KaratsubaThreshold             =   96;    // Checked
      ToomCook3Threshold             =  256;    // Checked
      BurnikelZieglerThreshold       =   80;    // Checked
      BurnikelZieglerOffsetThreshold =   40;    // Unchecked
      KaratsubaSqrThreshold          =  128;    // Unchecked
    {$ENDIF CPUX64}
  {$ENDIF PUREPASCAL}

      ToomCook3SqrThreshold          =  216;    // Unchecked
  {$ENDREGION}

  {$REGION 'public methods'}

    // -- Constructors --

    /// <summary>Initializes class variables before first use.</summary>
    class constructor Initialize;

    /// <summary>Creates a new BigInteger from the data in limbs and the sign specified in Negative.</summary>
    /// <param name="Limbs">data for the magnitude of the BigInteger. The data is interpreted as unsigned, and comes low limb first.</param>
    /// <param name="Negative">Indicates if the BigInteger is negative.</param>
    constructor Create(const Limbs: array of TLimb; Negative: Boolean); overload;

    /// <summary>Creates a new BigInteger from the data in limbs and the sign specified in Negative.</summary>
    /// <param name="Data">data for the magnitude of the BigInteger. The data is interpreted as unsigned, and comes low limb first.</param>
    /// <param name="Negative">Indicates if the BigInteger is negative.</param>
    constructor Create(const Data: TMagnitude; Negative: Boolean); overload;

    /// <summary>Creates a new BigInteger with the same value as the specified BigInteger.</summary>
    constructor Create(const Int: BigInteger); overload;

    /// <summary>Creates a new BigInteger with the value of the specified Integer.<summary>
    constructor Create(const Int: Int32); overload;

    /// <summary>Creates a new BigInteger with the value of the specified Cardinal.<summary>
    constructor Create(const Int: UInt32); overload;

    /// <summary>Creates a new BigInteger with the value of the specified 64 bit integer.<summary>
    constructor Create(const Int: Int64); overload;

    /// <summary>Creates a new BigInteger with the value of the specified Integer.<summary>
    constructor Create(const Int: UInt64); overload;

    /// <summary>Creates a new BigInteger with the integer value of the specified Double.</summary>
    constructor Create(const ADouble: Double); overload;

    /// <summary>Creates a new BigInteger from the value in the byte array.
    /// The byte array is considered to be in two's complement.</summary>
    /// <remarks>This is the complementary function of ToByteArray</remarks>
    constructor Create(const Bytes: array of Byte); overload;

    /// <summary>Creates a new random BigInteger of the given size. Uses the given IRandom to
    ///   generate the random value.</summary>
    constructor Create(NumBits: Integer; const Random: IRandom); overload;


    // -- Global numeric base related functions --

    /// <summary>Sets the global numeric base for big integers to 10.</summary>
    /// <remarks>The global numeric base is used for input or output if there is no override in the input string or
    ///   the output function.</remarks>
    class procedure Decimal; static;

    /// <summary>Sets the global numeric base for big integers to 16.</summary>
    /// <remarks>The global numeric base is used for input or output if there is no override in the input string or
    ///   the output function.</remarks>
    class procedure Hexadecimal; static;

    /// <summary>Sets the global numeric base for big integers to 16.</summary>
    /// <remarks>The global numeric base is used for input or output if there is no override in the input string or
    ///   the output function.</remarks>
    class procedure Hex; static;

    /// <summary>Sets the global numeric base for big integers to 2.</summary>
    /// <remarks>The global numeric base is used for input or output if there is no override in the input string or
    ///   the output function.</remarks>
    class procedure Binary; static;

    /// <summary>Sets the global numeric base for big integers to 8.</summary>
    /// <remarks>The global numeric base is used for input or output if there is no override in the input string or
    ///   the output function.</remarks>
    class procedure Octal; static;


    // -- String input functions --

    /// <summary>Tries to parse the specified string into a valid BigInteger value in the specified numeric base. Returns False if this failed.</summary>
    /// <param name="S">The string that represents a big integer value in the specified numeric base.</param>
    /// <param name="Base">The numeric base that is assumed when parsing the string. Valid values are 2..36.</param>
    /// <param name="Res">The resulting BigInteger, if the parsing succeeds. Res is undefined if the parsing fails.</param>
    /// <returns>Returns True if S could be parsed into a valid BigInteger in Res. Returns False on failure.</returns>
    class function TryParse(const S: string; Base: TNumberBase; out Res: BigInteger): Boolean; overload; static;

    // -------------------------------------------------------------------------------------------------------------//
    // Note: most of the parse format for BigIntegers was taken from or inspired by Common Lisp (e.g. '%nnR' or     //
    // '_'), some was inspired by other languages, including Delphi (e.g. the '$ 'for hex values), some was         //
    // something I prefer (e.g. '0k' additional to '0o' for octal format). It should be usable in Delphi as well    //
    // as in C++Builder, as it contains the default formats for integer values in these languages too.              //
    // -- Rudy Velthuis.                                                                                            //
    //--------------------------------------------------------------------------------------------------------------//

    /// <summary>Tries to parse the specified string into a valid BigInteger value in the default BigInteger numeric base.</summary>
    /// <param name="S">The string that represents a big integer value in the default numeric base, unless
    ///   specified otherwise. See <see cref="BigInteger.Base" /></param>
    /// <param name="Res">The resulting BigInteger, if the parsing succeeds. Res is undefined if the parsing fails.</param>
    /// <returns>Returns True if S could be parsed into a valid BigInteger in Res. Returns False on failure.</returns>
    /// <remarks>
    ///   <para>To make it easier to increase the legibility of large numbers, any '_' in the numeric string will completely be ignored, so
    ///     '1_000_000_000' is exactly equivalent to '1000000000'.</para>
    ///   <para>The string to be parsed is considered case insensitive, so '$ABC' and '$abc' represent exactly the same value.</para>
    ///   <para>The format of a string to be parsed is as follows:</para>
    ///   <para><c>[sign][base override]digits</c></para>
    ///   <para>
    ///     <param name="sign">This can either be '-' or '+'. It will make the BigInteger negative or positive, respectively.
    ///     If no sign is specified, a positive BigInteger is generated.</param>
    ///     <param name="base override">There are several ways to override the default numeric base.<para>Specifying '0x' or '$' here will cause the
    ///     string to be interpreted as representing a hexadecimal (base 16) value.</para><para>Specifying '0b' will cause it to be interpreted as
    ///     binary (base 2).</para><para>Specifying '0d' will cause it to be interpreted as
    ///     decimal (base 10).</para>
    ///     <para>Specifying '0o' or '0k' will cause it to be interpreted as octal (base 8).</para><para>Finally, to specify any base,
    ///     using an override in the format '%nnR' (R for radix) will cause the number to be interpreted to be in base 'nn', where 'nn' represent one or two
    ///     decimal digits. So '%36rRudyVelthuis' is a valid BigInteger value with base 36.</para></param>
    ///   </para>
    /// </remarks>
    class function TryParse(const S: string; out Res: BigInteger; aBase : Integer): Boolean; overload; static;

    /// <summary>Parses the specified string into a BigInteger, using the default numeric base.</summary>
    class function Parse(const S: string; aBase : Integer): BigInteger; static;


    // -- Sign related functions --

    /// <summary>Returns True if the BigInteger is zero.</summary>
    function IsZero: Boolean; inline;

    /// <summary>Returns True if the BigInteger is negative (&lt; 0).</summary>
    function IsNegative: Boolean; inline;

    /// <summary>Returns True if the BigInteger is positive (&gt; 0).</summary>
    function IsPositive: Boolean; inline;

    /// <summary>Returns True if the BigInteger is even (0 is considered even too).</summary>
    function IsEven: Boolean; inline;

    /// <summary>Returns True if the magnitude of the BigInteger value is exactly a power of two.</summary>
    function IsPowerOfTwo: Boolean;

    /// <summary>Returns True if the BigInteger represents a value of 1.</summary>
    function IsOne: Boolean;

    // Bit fiddling

    // TODO: Should have two's complement semantics.
    function TestBit(Index: Integer): Boolean;
    function SetBit(Index: Integer): BigInteger;
    function ClearBit(Index: Integer): BigInteger;
    function FlipBit(Index: Integer): BigInteger;

    // String output functions

    /// <summary>Returns the string interpretation of the specified BigInteger in the default numeric base, see <see cref="BigInteger.Base" />.</summary>
    function ToString: string; overload;

    /// <summary>Returns the string interpretation of the specified BigInteger in the specified numeric base.</summary>
    function ToString(Base: Integer): string; overload;

    /// <summary>Returns the string interpretation of the specified BigInteger in numeric base 10. Equivalent
    ///   to ToString(10).</summary>
    function ToDecimalString: string;

    /// <summary>Returns the string interpretation of the specified BigInteger in numeric base 16. Equivalent
    ///   to ToString(16).</summary>
    function ToHexString: string;

    /// <summary>Returns the string interpretation of the specified BigInteger in numeric base 2. Equivalent
    ///   to ToString(2).</summary>
    function ToBinaryString: string;

    /// <summary>Returns the string interpretation of the specified BigInteger in numeric base 8. Equivalent
    ///   to ToString(8).</summary>
    function ToOctalString: string;

    procedure FromString(const Value: string; aBase : Integer);


    // -- Arithmetic operators --

    /// <summary>Adds two BigIntegers.</summary>
    class operator Add(const Left, Right: BigInteger): BigInteger;

    /// <summary>Subtracts the second BigInteger from the first.</summary>
    class operator Subtract(const Left, Right: BigInteger): BigInteger;

    /// <summary>Multiplies two BigIntegers.</summary>
    class operator Multiply(const Left, Right: BigInteger): BigInteger;

    /// <summary>Multiplies the specified BigInteger with the specified Word value.</summary>
    class operator Multiply(const Left: BigInteger; Right: Word): BigInteger; inline;

    /// <summary>multiplies the specified Wirdvalue with the specified BigInteger.</summary>
    class operator Multiply(Left: Word; const Right: BigInteger): BigInteger; inline;

    /// <summary>Performs an integer divide of the first BigInteger by the second.
    class operator IntDivide(const Left, Right: BigInteger): BigInteger;

    /// <summary>Performs an integer divide of the first BigInteger by the second.
    class operator IntDivide(const Left: BigInteger; Right: UInt16): BigInteger;

    /// <summary>Performs an integer divide of the first BigInteger by the second.
    class operator IntDivide(const Left: BigInteger; Right: UInt32): BigInteger;

    /// <summary>Returns the remainder of an integer divide of the first BigInteger by the second.</summary>
    class operator Modulus(const Left, Right: BigInteger): BigInteger;

    /// <summary>Returns the remainder of an integer divide of the first BigInteger by the second.</summary>
    class operator Modulus(const Left: BigInteger; Right: UInt32): BigInteger;

    /// <summary>Returns the remainder of an integer divide of the first BigInteger by the second.</summary>
    class operator Modulus(const Left: BigInteger; Right: UInt16): BigInteger;

    /// <summary>Unary minus. Negates the value of the specified BigInteger.</summary>
    class operator Negative(const Int: BigInteger): BigInteger;

    /// <summary>Increment. Adds 1 to the value of the specified BigInteger very fast.</summary>
    class operator Inc(const Int: BigInteger): BigInteger;

    /// <summary>Decrement. Subtracts 1 from the value of the specified BigInteger very fast.</summary>
    class operator Dec(const Int: BigInteger): BigInteger;


    // -- Logical and bitwise operators --

    /// <summary>Returns the result of the bitwise AND operation on its BigInteger operands. The result
    /// has two's complement semantics, e.g. '-1 and 7' returns '7'.</summary>
    class operator BitwiseAnd(const Left, Right: BigInteger): BigInteger;

    /// <summary>Returns the result of the bitwise OR operation on its BigInteger operands. The result
    /// has two's complement semantics, e.g. '-1 or 7' returns '-1'.</summary>
    class operator BitwiseOr(const Left, Right: BigInteger): BigInteger;

    /// <summary>Returns the result of the bitwise XOR operation on its BigIntegers operands. The result
    /// has two's complement semantics, e.g. '-1 xor 7' returns '-8'.</summary>
    class operator BitwiseXor(const Left, Right: BigInteger): BigInteger;

    /// <summary>Returns the result of the bitwise NOT operation on its BigInteger operand. The result
    /// has two's complement semantics, e.g. 'not 1' returns '-2'.</summary>
    class operator LogicalNot(const Int: BigInteger): BigInteger;


    // -- Shift operators --

    /// <summary>Shifts the specified BigInteger value the specified number of bits to the left (away from 0).
    ///   The size of the BigInteger is adjusted accordingly.</summary>
    /// <remarks>Note that this is an arithmetic shift, i.e. the sign is preserved. This is unlike normal
    ///   integer shifts in Delphi.</remarks>
    class operator LeftShift(const Value: BigInteger; Shift: Integer): BigInteger;

    /// <summary>Shifts the specified BigInteger value the specified number of bits to the right (toward 0).
    ///   The size of the BigInteger is adjusted accordingly.</summary>
    /// <remarks>Note that this is an arithmetic shift, i.e. the sign is preserved. This is unlike normal
    ///   integer shifts in Delphi. This means that negative values do not finally end up as 0, but
    ///   as -1, since the sign bit is always shifted in.</remarks>
    class operator RightShift(const Value: BigInteger; Shift: Integer): BigInteger;


    // -- Comparison operators --

    /// <summary>Returns True if the specified BigIntegers have the same value.</summary>
    class operator Equal(const Left, Right: BigInteger): Boolean;

    /// <summary>Returns True if the specified BigInteger do not have the same value.</summary>
    class operator NotEqual(const Left, Right: BigInteger): Boolean;

    /// <summary>Returns true if the value of Left is mathematically greater than the value of Right.</summary>
    class operator GreaterThan(const Left, Right: BigInteger): Boolean;

    /// <summary>Returns true if the value of Left is mathematically greater than or equal to the value of Right.</summary>
    class operator GreaterThanOrEqual(const Left, Right: BigInteger): Boolean;

    /// <summary>Returns true if the value of Left is mathematically less than the value of Right.</summary>
    class operator LessThan(const Left, Right: BigInteger): Boolean;

    /// <summary>Returns true if the value of Left is mathematically less than or equal to the value of Right.</summary>
    class operator LessThanOrEqual(const Left, Right: BigInteger): Boolean;


    // -- Implicit conversion operators --

    /// <summary>Implicitly (i.e. without a cast) converts the specified Integer to a BigInteger.</summary>
    class operator Implicit(const Int: Integer): BigInteger;

    /// <summary>Implicitly (i.e. without a cast) converts the specified Cardinal to a BigInteger.</summary>
    class operator Implicit(const Int: Cardinal): BigInteger;

    /// <summary>Implicitly (i.e. without a cast) converts the specified Int64 to a BigInteger.</summary>
    class operator Implicit(const Int: Int64): BigInteger;

    /// <summary>Implicitly (i.e. without a cast) converts the specified UInt64 to a BigInteger.</summary>
    class operator Implicit(const Int: UInt64): BigInteger;

    /// <summary>Implicitly (i.e. without a cast) converts the specified string to a BigInteger. The BigInteger is the
    ///   result of a call to Parse(Value).</summary>
    class operator Implicit(const Value: string): BigInteger;


    // -- Explicit conversion operators --

    /// <summary>Explicitly (i.e. with a cast) converts the specified BigInteger to an Integer. If necessary, the
    ///   value of the BigInteger is truncated or sign-extended to fit in the result.</summary>
    class operator Explicit(const Int: BigInteger): Integer;

    /// <summary>Explicitly (i.e. with a cast) converts the specified BigInteger to a Cardinal. If necessary, the
    ///   value of the BigInteger is truncated to fit in the result.</summary>
    class operator Explicit(const Int: BigInteger): Cardinal;

    /// <summary>Explicitly (i.e. with a cast) converts the specified BigInteger to an Int64. If necessary, the
    ///   value of the BigInteger is truncated or sign-extended to fit in the result.</summary>
    class operator Explicit(const Int: BigInteger): Int64;

    /// <summary>Explicitly (i.e. with a cast) converts the specified BigInteger to an UInt64. If necessary, the
    ///   value of the BigInteger is truncated to fit in the result.</summary>
    class operator Explicit(const Int: BigInteger): UInt64;

    /// <summary>Explicitly (i.e. with a cast) converts the specified BigInteger to a Double.</summary>
    class operator Explicit(const Int: BigInteger): Double;

    /// <summary>Explicitly (i.e. with a cast) converts the specified Double to a BigInteger.</summary>
    class operator Explicit(const ADouble: Double): BigInteger;


    // -- Conversion functions --

    /// <summary>Converts the specified BigInteger to a Double, if this is possible. Returns an exception if the
    ///   value of the BigInteger is too large.</summary>
    function AsDouble: Double;

    /// <summary>Converts the specified BigInteger to an Integer, if this is possible. Returns an exception if the
    ///   value of the BigInteger is too large.</summary>
    function AsInteger: Integer;

    /// <summary>Converts the specified BigInteger to a Cardinal, if this is possible. Returns an exception if the
    ///   value of the BigInteger is too large or is negative.</summary>
    function AsCardinal: Cardinal;

    /// <summary>Converts the specified BigInteger to an Int64, if this is possible. Returns an exception if the
    ///   value of the BigInteger is too large.</summary>
    function AsInt64: Int64;

    /// <summary>Converts the specified BigInteger to a UInt64, if this is possible. Returns an exception if the
    ///   value of the BigInteger is too large or is negative.</summary>
    function AsUInt64: UInt64;


    // -- Operators as functions --

    /// <summary>The function equivalent to the operator '+'.</summary>
    class function Add(const Left, Right: BigInteger): BigInteger; overload; static;

    /// <summary>The function equivalent to the operator '-'.</summary>
    class function Subtract(const Left, Right: BigInteger): BigInteger; overload; static;

    /// <summary>The function equivalent to the operator '*'.</summary>
    class function Multiply(const Left, Right: BigInteger): BigInteger; overload; static;
    class function MultiplyThreshold(const Left, Right: BigInteger; Threshold: Integer): BigInteger; static;
    class function MultiplyBaseCase(const Left, Right: BigInteger): BigInteger; static;

    /// <summary>The function equivalent to the operators 'div' and 'mod'. Since calculation of the quotient
    ///   automatically leaves a remainder, this function allows you to get both for more or less the "price"
    ///   (performance-wise) of one.</summary>
    class procedure DivMod(const Dividend, Divisor: BigInteger; var Quotient, Remainder: BigInteger); static;

    /// <summary>Simple "schoolbook" division according to Knuth, with limb-size digits.</summary>
    class procedure DivModKnuth(const Left, Right: BigInteger; var Quotient, Remainder: BigInteger); static;

    /// <summary>Recursive "schoolbook" division, as described by Burnikel and Ziegler. Faster than
    /// <see cref="DivModBaseCase" />, but with more overhead, so should only be applied for larger BigIntegers.</summary>
    class procedure DivModBurnikelZiegler(const Left, Right: BigInteger; var Quotient, Remainder: BigInteger); static;

    /// <summary>The function equivalent to the operator 'div'.</summary>
    class function Divide(const Left: BigInteger; Right: UInt16): BigInteger; overload; static;

    /// <summary>The function equivalent to the operator 'div'.</summary>
    class function Divide(const Left:BigInteger; Right: UInt32): BigInteger; overload; static;

    /// <summary>The function equivalent to the operator 'div'.</summary>
    class function Divide(const Left, Right: BigInteger): BigInteger; overload; static;

    /// <summary>>The function equivalent to the operator 'mod'. Like for integers, the remainder gets
    ///   the sign - if any - of the dividend (i.e. of Left).</summary>
    class function Remainder(const Left, Right: BigInteger): BigInteger; overload; static;

    /// <summary>>The function equivalent to the operator 'mod'. Like for integers, the remainder gets
    ///   the sign - if any - of the dividend (i.e. of Left).</summary>
    class function Remainder(const Left: BigInteger; Right: UInt32): BigInteger; overload; static;

    /// <summary>>The function equivalent to the operator 'mod'. Like for integers, the remainder gets
    ///   the sign - if any - of the dividend (i.e. of Left).</summary>
    class function Remainder(const Left: BigInteger; Right: UInt16): BigInteger; overload; static;

    class function MultiplyKaratsuba(const Left, Right: BigInteger): BigInteger; static;
    class function MultiplyToomCook3(const Left, Right: BigInteger): BigInteger; static;

    class function SqrKaratsuba(const Value: BigInteger): BigInteger; static;

  {$IFDEF Experimental}
    class function MultiplyKaratsubaThreshold(const Left, Right: BigInteger; Threshold: Integer): BigInteger; static;
    class function MultiplyToomCook3Threshold(const Left, Right: BigInteger; Threshold: Integer): BigInteger; static;
    class function SqrKaratsubaThreshold(const Value: BigInteger; Threshold: Integer): BigInteger; static;
  {$ENDIF Experimental}


    // -- Self-referential operator functions --

    /// <summary>
    ///   <para>The functional equivalent to</para>
    ///   <code>    A := A + Other;</code>
    ///   <para>This can be chained, as the function returns a pointer to itself:</para>
    ///   <code>    A.Add(First).Add(Second);</code></summary>
    /// <remarks><para>This was added in the hope to gain speed by avoiding some allocations.
    ///   This is not so, although a longer chain seems to improve performance, compared to normal addition
    ///   using operators, a bit.</para></remarks>
    function Add(const Other: BigInteger): PBigInteger; overload;

    /// <summary>The functional equivalent to Self := Self + Other;</summary>
    function Subtract(const Other: BigInteger): PBigInteger; overload;

    /// <summary>The functional equivalent to Self := Self div Other;</summary>
    function Divide(const Other: BigInteger): PBigInteger; overload;

    /// <summary>The functional equivalent to Self := Self mod Other;</summary>
    function Remainder(const Other: BigInteger): PBigInteger; overload;

    /// <summar>The functional equivalent to Self := Self * Other;</summary>
    function Multiply(const Other: BigInteger): PBigInteger; overload;


    // -- Math functions --

    /// <summary>Returns the absolute value of the value in the BigInteger.</summary>
    class function Abs(const Int: BigInteger): BigInteger; static;

    /// <summary>Returns the bit length, the minimum number of bits needed to represent the value, excluding
    ///   the sign bit.</summary>
    function BitLength: Integer;

    /// <summary>Returns the number of all bits that are set, assuming two's complement. The sign bit is
    ///  included in the count.</summary>
    function BitCount: Integer;

    /// <summary>Returns a copy of the current BigInteger, with a unique copy of the data.</summary>
    function Clone: BigInteger;

    /// <summary>Returns +1 if the value in Left is greater than the value in Right, 0 if they are equal and
    ///   1 if it is lesser.</summary>
    class function Compare(const Left, Right: BigInteger): TValueSign; static;

    /// <summary>Returns the (positive) greatest common divisor of the specified BigInteger values.</summary>
    class function GreatestCommonDivisor(const Left, Right: BigInteger): BigInteger; static;

    /// <summary>Returns the natural logarithm of the value in Int.</summary>
    class function Ln(const Int: BigInteger): Double; static;

    /// <summary>Returns the logarithm to the specified base of the value in Int.</summary>
    class function Log(const Int: BigInteger; Base: Double): Double; static;

    /// <summary>Returns the logarithm to base 2 of the value in Int.</summary>
    class function Log2(const Int: BigInteger): Double; static;

    /// <summary>Returns the logarithm to base 10 of the value in Int.</summary>
    class function Log10(const Int: BigInteger): Double; static;

    /// <summary>Returns the larger of two specified values.</summary>
    class function Max(const Left, Right: BigInteger): BigInteger; static;

    /// <summary>Returns the smaller of two specified values.</summary>
    class function Min(const Left, Right: BigInteger): BigInteger; static;

    /// <summary>Returns the specified modulus value of the specified value raised to the specified power.</summary>
    class function ModPow(const ABase, AExponent, AModulus: BigInteger): BigInteger; static;

    /// <summary>Returns the specified value raised to the specified power.</summary>
    class function Pow(const ABase: BigInteger; AExponent: Integer): BigInteger; static;

    /// <summary>Returns the nth root R of a BigInteger such that R^Nth <= Radicand <= (R+1)^Nth.</summary>
    class function NthRoot(const Radicand: BigInteger; Nth: Integer): BigInteger; static;

    /// <summary>If R is the nth root of Radicand, returns Radicand - R^Nth.</summary>
    class procedure NthRootRemainder(const Radicand: BigInteger; Nth: Integer; var Root, Remainder: BigInteger); static;

    /// <summary> Returns the square root R of Radicand, such that R^2 < Radicand < (R+1)^2</summary>
    class function Sqrt(const Radicand: BigInteger): BigInteger; static;

    /// <summary>If R is the square root of Radicand, returns Radicand - R^2.</summary>
    class procedure SqrtRemainder(const Radicand: BigInteger; var Root, Remainder: BigInteger); static;

    class function Sqr(const Value: BigInteger): BigInteger; static;


    // -- Utility functions --

    /// <summary>Sets whether partial-flags stall must be avoided with modified routines.</summary>
    /// <remarks>USING THE WRONG SETTING MAY AFFECT THE TIMING OF CERTAIN ROUTINES CONSIDERABLY, SO USE THIS WITH EXTREME CARE!
    ///   The unit is usually able to determine the right settings automatically.</remarks>
    class procedure AvoidPartialFlagsStall(Value: Boolean); static;

    // -- Array function(s) --

    /// <summary>Converts a BigInteger value to a byte array.</summary>
    /// <returns><para>A TArray&lt;Byte&gt;, see remarks.</para></returns>
    /// <remarks>
    ///   <para>The individual bytes in the array returned by this method appear in little-endian order.</para>
    ///   <para>Negative values are written to the array using two's complement representation in the most compact
    ///   form possible. For example, -1 is represented as a single byte whose value is $FF instead of as an array
    ///   with multiple elements, such as $FF, $FF or $FF, $FF, $FF, $FF.</para>
    ///   <para>Because two's complement representation always interprets the highest-order bit of the last byte in
    ///   the array (the byte at position High(Array)) as the sign bit, the method returns a byte array with
    ///   an extra element whose value is zero to disambiguate positive values that could otherwise be interpreted
    ///   as having their sign bits set. For example, the value 120 or $78 is represented as a single-byte array:
    ///   $78. However, 129, or $81, is represented as a two-byte array: $81, $00. Something similar applies to
    ///   negative values: -179 (or -$B3) must be represented as $4D, $FF.</para>
    /// </remarks>
    function ToByteArray: TArray<Byte>;
    function GetAllocated: Integer;
    function GetSize: Integer; inline;
    function Data: PLimb; inline;
    function GetSign: Integer; inline;
    procedure SetSign(Value: Integer); inline;
  {$ENDREGION}

  private
  {$REGION 'private constants, types and variables'}
    type
      TErrorCode = (ecParse, ecDivbyZero, ecConversion, ecInvalidArg, ecOverflow, ecInvalidArgument);
      TClearData = (cdClearData, cdKeepData);
      TDyadicOperator = procedure(Left, Right, Result: PLimb; LSize, RSize: Integer);
    var
      FData: TMagnitude;                        // The limbs of the magnitude, least significant limb at lowest address.
      FSize: Integer;                           // The top bit is the sign of the big integer. Rest is the number of valid limbs of the big integer.
    class var
      FBase: TNumberBase;
      FAvoidStall: Boolean;
      FRoundingMode: TRoundingMode;
      FInternalAdd: TDyadicOperator;
      FInternalSubtract: TDyadicOperator;
  {$ENDREGION}

  {$REGION 'private functions'}
  {$IFNDEF PUREPASCAL}
    class procedure DetectPartialFlagsStall; static;
    class procedure InternalAddModified(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalAddPlain(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalSubtractModified(Larger, Smaller, Result: PLimb; LSize, SSize: Integer); static;
    class procedure InternalSubtractPlain(Larger, Smaller, Result: PLimb; LSize, SSize: Integer); static;
    class procedure InternalDivideBy3(Value, Result: PLimb; ASize: Integer); static;
  {$ELSE}
    class procedure InternalAddPurePascal(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalSubtractPurePascal(Larger, Smaller, Result: PLimb; LSize, SSize: Integer); static;
  {$ENDIF}
    class function InternalCompare(Left, Right: PLimb; LSize, RSize: Integer): TValueSign; static;
    class procedure InternalAnd(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalOr(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalXor(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalAndNot(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
    class procedure InternalNotAnd(Left, Right, Result: PLimb; LSize, RSize: Integer); static; inline;
    class procedure InternalBitwise(const Left, Right: BigInteger; var Result: BigInteger; PlainOp, OppositeOp, InversionOp: TDyadicOperator); static;
    class procedure InternalIncrement(Limbs: PLimb; Size: Integer); static;
    class procedure InternalDecrement(Limbs: PLimb; Size: Integer); static;
    class procedure InternalShiftLeft(Source, Dest: PLimb; Shift, Size: Integer); static;
    class procedure InternalShiftRight(Source, Dest: PLimb; Shift, Size: Integer); static;
    class function InternalDivMod(Dividend, Divisor, Quotient, Remainder: PLimb; LSize, RSize: Integer): Boolean; static;
    class function InternalDivMod32(Dividend: PLimb; Divisor: UInt32; Quotient, Remainder: PLimb; LSize: Integer): Boolean; static;
    class function InternalDivMod16(Dividend: PLimb; Divisor: UInt16; Quotient, Remainder: PLimb; LSize: Integer): Boolean; static;
    class procedure InternalMultiply(Left, Right, Result: PLimb; LSize, RSize: Integer); static;
//    class function InternalCompareMagnitudes(Left, Right: PLimb; LSize, RSize: Integer): TValueSign; static;
    class function InternalDivideByBase(Mag: PLimb; Base: Integer; var Size: Integer): UInt32; static;
    class procedure InternalMultiplyAndAdd(const Multiplicand: TMagnitude; Multiplicator, Addend: Word; const Res: TMagnitude); static;
    class procedure InternalNegate(Source, Dest: PLimb; Size: Integer); static;
    class function DivideBy3Exactly(const A: BigInteger): BigInteger; static;
    class procedure InternalDivModBurnikelZiegler(const Left, Right: BigInteger; var Quotient, Remainder: BigInteger); static;
    class procedure DivThreeHalvesByTwo(const LeftUpperMid, LeftLower, Right, RightUpper, RightLower: BigInteger; N: Integer; var Quotient, Remainder: BigInteger); static;
    class procedure DivTwoDigitsByOne(const Left, Right: BigInteger; N: Integer; var Quotient, Remainder: BigInteger); static;

    procedure AddWithOffset(const Addend: BigInteger; Offset: Integer);
    function Split(BlockSize, BlockCount: Integer): TArray<BigInteger>;

    class procedure SetBase(const Value: TNumberBase); static;
    class procedure Error(ErrorCode: TErrorCode; const ErrorInfo: string = ''); static;

    procedure Compact;
    procedure EnsureSize(RequiredSize: Integer);
    procedure MakeSize(RequiredSize: Integer);
  {$ENDREGION}

  public
  {$REGION 'public properties'}
    property Size: Integer read GetSize;
    property Allocated: Integer read GetAllocated;
    property Negative: Boolean read IsNegative;
    property Sign: Integer read GetSign write SetSign;
    property Magnitude: TMagnitude read FData;

    // -- Global numeric base for BigIntegers --

    class property Base: TNumberBase read FBase write SetBase;
    class property StallAvoided: Boolean read FAvoidStall;
    class property RoundingMode: TRoundingMode read FRoundingMode write FRoundingMode;
  {$ENDREGION}

  end;

function SignBitOf(Int: Integer): Integer; inline;

function Min(const A, B: BigInteger): BigInteger; overload; inline;
function Max(const A, B: BigInteger): BigInteger; overload; inline;

//var
//  DoDebug: Boolean = True;

implementation

// To switch PUREPASCAL for debug purposes. UNDEF PUREPASCAL before the routine and DEFINE PUREPASCAL after the routine, if PP was defined.
{$IFDEF PUREPASCAL}
{$DEFINE PP}
{$ENDIF}

// Copy the following around the routine for which you want to switch off PUREPASCAL

{$UNDEF PUREPASCAL}
// Routine here.
{$IFDEF PP}
{$DEFINE PUREPASCAL}
{$ENDIF}

uses
{$IFDEF DEBUG}
  Winapi.Windows,
{$ENDIF}
  Velthuis.Sizes, Velthuis.Numerics;

{$POINTERMATH ON}

{$IFDEF DEBUG}
function Join(const Delimiter: string; const Values: array of string): string;
var
  I: Integer;
begin
  if Length(Values) > 0 then
  begin
    Result := Values[0];
    for I := 1 to High(Values) do
      Result := Delimiter + Result;
  end;
end;

function DumpPLimb(P: PLimb; Size: Integer): string;
var
  SL: TArray<string>;
  I: Integer;
begin
  Result := '';
  SetLength(SL, Size);
  for I := 0 to Size - 1 do
    SL[I] := Format('%.8x', [P[Size - I - 1]]);
  Result := Result + Join(' ', SL);
end;

procedure Debug(const Msg: string; const Params: array of const); overload;
begin
  if not DoDebug then
    Exit;

  if IsConsole then
    // Write to console.
    Writeln(System.ErrOutput, Format(Msg, Params))
  else
    // Inside the IDE, this will be displayed in the Event Log.
    OutputDebugString(PChar(Format(Msg, Params)));
end;

procedure Debug(const Msg: string); overload;
begin
  Debug(Msg, []);
end;
{$ELSE}
procedure Debug(const Msg: string; const Params: array of const);
begin
end;
{$ENDIF}

const
  CTimingLoops = $40000;

{$IFNDEF PUREPASCAL}
procedure Timing(var T1, T2, T3: UInt64); stdcall;
{$IFDEF WIN32}
asm
        RDTSC
        MOV     ECX,T1
        MOV     DWORD PTR [ECX],EAX
        MOV     DWORD PTR [ECX+4],EDX
        XOR     EAX,EAX
        MOV     EDX,CTimingLoops

@ADCLoop:

        ADC     EAX,[ECX]       // Partial-flags stall on some "older" processors causes a measurable timing difference.
        DEC     EDX             // DEC only changes one flag, not entire flags register, causing a stall when ADC reads flag register.
        JNE     @ADCLoop

        RDTSC
        MOV     ECX,T2
        MOV     [ECX],EAX
        MOV     [ECX+4],EDX
        XOR     EAX,EAX
        MOV     EDX,CTimingLoops
        NOP

@ADDLoop:

        ADD     EAX,[ECX]       // ADD does not read carry flag, so no partial-flags stall.
        DEC     EDX
        JNE     @ADDLoop

        RDTSC
        MOV     ECX,T3
        MOV     [ECX],EAX
        MOV     [ECX+4],EDX
end;
{$ELSE}
asm
        MOV     R9,RDX
        RDTSC
        MOV     [RCX],EAX
        MOV     [RCX+4],EDX
        XOR     EAX,EAX
        MOV     EDX,CTimingLoops

@ADCLoop:

        ADC     EAX,[RCX]
        DEC     EDX
        JNE     @ADCLoop

        RDTSC
        MOV     [R9],EAX
        MOV     [R9+4],EDX
        XOR     EAX,EAX
        MOV     EDX,CTimingLoops
        NOP

@ADDLoop:

        ADD     EAX,[RCX]
        DEC     EDX
        JNE     @ADDLoop

        RDTSC
        MOV     [R8],EAX
        MOV     [R8+4],EDX
end;
{$ENDIF}

class procedure BigInteger.DetectPartialFlagsStall;
var
  T1, T2, T3: UInt64;
  I1, I2: UInt64;
begin
  Randomize;
  repeat
    Timing(T1, T2, T3);
    I1 := T2 - T1;
    I2 := T3 - T2;
    Debug('Timing: %d / %d = %.2f', [I1, I2, I1 / I2]);

    // Make sure timings are far enough apart. Repeat if in "grey area" inbetween.
    if I1 / I2 > 4.0 then
    begin
      AvoidPartialFlagsStall(True);
      Exit;
    end
    else if I1 / I2 < 2.0 then
    begin
      AvoidPartialFlagsStall(False);
      Exit;
    end;
  until False;
end;
{$ENDIF !PUREPASCAL}

procedure DivMod64(Dividend: UInt64; Divisor: UInt64;
  var Result, Remainder: UInt64); overload;
{$IF DEFINED(CPUX64) and NOT DEFINED(PUREPASCAL_X64)}
asm
        MOV     R10,RDX
        MOV     RAX,RCX
        XOR     EDX,EDX
        DIV     R10
        MOV     [R8],RAX
        MOV     [R9],RDX
end;
{$ELSE}
// Merged from system __lludiv and __llumod
asm
        PUSH    EBX
        PUSH    ESI
        PUSH    EDI
        PUSH    EAX
        PUSH    EDX
//
//       Now the stack looks something like this:
//
//               40[esp]: Dividend(high dword)
//               36[esp]: Dividend(low dword)
//               32[esp]: divisor (high dword)
//               28[esp]: divisor (low dword)
//               24[esp]: return EIP
//               20[esp]: previous EBP
//               16[esp]: previous EBX
//               12[esp]: previous ESI
//                8[esp]: previous EDI
//                4[esp]: previous EAX  Result ptr
//                 [esp]: previous EDX  Remainder ptr
//
        MOV     EBX,28[ESP]             // get the divisor low word
        MOV     ECX,32[ESP]             // get the divisor high word
        MOV     EAX,36[ESP]             // get the dividend low word
        MOV     EDX,40[ESP]             // get the dividend high word

        OR      ECX,ECX
        JNZ     @DivMod64@slow_ldiv     // both high words are zero

        OR      EDX,EDX
        JZ      @DivMod64@quick_ldiv

        OR      EBX,EBX
        JZ      @DivMod64@quick_ldiv    // if ecx:ebx == 0 force a zero divide
          // we don't expect this to actually
          // work
@DivMod64@slow_ldiv:
        MOV     EBP,ECX
        MOV     ECX,64                  // shift counter
        XOR     EDI,EDI                 // fake a 64 bit dividend
        XOR     ESI,ESI                 //

@DivMod64@xloop:
        SHL     EAX,1                   // shift dividend left one bit
        RCL     EDX,1
        RCL     ESI,1
        RCL     EDI,1
        CMP     EDI,EBP                 // dividend larger?
        JB      @DivMod64@nosub
        JA      @DivMod64@subtract
        CMP     ESI,EBX                 // maybe
        JB      @DivMod64@nosub

@DivMod64@subtract:
        SUB     ESI,EBX
        SBB     EDI,EBP                 // subtract the divisor
        INC     EAX                     // build quotient

@DivMod64@nosub:
        LOOP    @DivMod64@xloop
//
//       When done with the loop the four registers values' look like:
//
//       |     edi    |    esi     |    edx     |    eax     |
//       |        remainder        |         quotient        |
//
        JMP     @DivMod64@finish

@DivMod64@quick_ldiv:
        DIV     EBX                     // unsigned divide
        MOV     ESI,EDX
        XOR     EDX,EDX
        XOR     EDI,EDI

@DivMod64@finish:
        POP     EBX
        POP     ECX
        MOV     [EBX],ESI
        MOV     [EBX+4],EDI
        MOV     [ECX],EAX
        MOV     [ECX+4],EDX

        POP     EDI
        POP     ESI
        POP     EBX
end;
{$ifend}
(*
begin
  Result := Dividend div Divisor;
  Remainder := Dividend mod Divisor;
end;
*)

resourcestring
  SErrorBigIntegerParsing = '''%s'' is not a valid big integer value';
  SDivisionByZero         = 'Division by zero';
  SInvalidOperation       = 'Invalid operation';
  SConversionFailed       = 'BigInteger value too large for conversion to %s';
  SOverflow               = 'Double parameter may not be NaN or +/- Infinity';
  SInvalidArgumentBase    = 'Base parameter must be in the range 2..36.';
  SSqrtBigInteger         = 'Negative values not allowed for Sqrt';

{$RANGECHECKS OFF}
{$OVERFLOWCHECKS OFF}
{$POINTERMATH ON}
{$STACKFRAMES OFF}

type
  TUInt64 = record
    Lo, Hi: UInt32;
  end;

const
  // Size of a single limb, used in e.g. asm blocks.
  CLimbSize = SizeOf(TLimb);

  // Double limb, for 64 bit access
  DLimbSize = 2 * CLimbSize;

  // Array mapping a digit in a specified base to its textual representation.
  CBaseChars: array[0..35] of Char = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ';
  CNumBase = Ord('0');
  CAlphaBase = Ord('A');

  // Array mapping a specified base to the maximum number of digits required to represent one limb in that base.
  // They map a specified base to Ceil(32 / Log2(base)).
  CStringMaxLengths: array[TNumberBase] of Integer =
  (
    32, 21, 16, 14, 13, 12, 11,
    11, 10, 10,  9,  9,  9,  9,
     8,  8,  8,  8,  8,  8,  8,
     8,  7,  7,  7,  7,  7,  7,
     7,  7,  7,  7,  7,  7,  7
  );

  CStringMinLengths: array[TNumberBase] of Integer =
  (
    32, 20, 16, 13, 12, 11, 10,
    10,  9,  9,  8,  8,  8,  8,
     8,  7,  7,  7,  7,  7,  7,
     7,  6,  6,  6,  6,  6,  6,
     6,  6,  6,  6,  6,  6,  6
  );

  // Various useful sizes and bitcounts.
  CLimbBits     = CByteBits * CLimbSize;
  CLimbWords    = CLimbSize div SizeOf(Word);
  CUInt64Limbs  = SizeOf(UInt64) div CLimbSize;
  CInt64Limbs   = SizeOf(Int64) div CLimbSize;

function IntMax(Left, Right: UInt32): UInt32;
{$IFNDEF PUREPASCAL}
{$IFDEF WIN32}
asm
        CMP    EAX,EDX
        CMOVB  EAX,EDX
end;
{$ELSE WIN64}
asm
        MOV    EAX,ECX
        CMP    EAX,EDX
        CMOVB  EAX,EDX
end;
{$ENDIF}
{$ELSE}
begin
  Result := Left;
  if Left < Right then
    Result := Right;
end;
{$ENDIF}

function IntMin(Left, Right: UInt32): UInt32;
{$IFNDEF PUREPASCAL}
{$IFDEF WIN32}
asm
        CMP    EAX,EDX
        CMOVA  EAX,EDX
end;
{$ELSE WIN64}
asm
        MOV    EAX,ECX
        CMP    EAX,EDX
        CMOVA  EAX,EDX
end;
{$ENDIF}
{$ELSE}
begin
  Result := Left;
  if Left > Right then
    Result := Right;
end;
{$ENDIF}

function ShouldUseBurnikelZiegler(LSize, RSize: Integer): Boolean; inline;
begin
  // http://mail.openjdk.java.net/pipermail/core-libs-dev/2013-November/023493.html
  Result := (RSize >= BigInteger.BurnikelZieglerThreshold) and
            ((LSize - RSize) >= BigInteger.BurnikelZieglerOffsetThreshold);
end;

function SizeBitsOf(Int: Integer): Integer; inline;
begin
  Result := Int and BigInteger.SizeMask;
end;

function SignBitOf(Int: Integer): Integer; inline;
begin
  Result := Int and BigInteger.SignMask;
end;

function Min(const A, B: BigInteger): BigInteger; inline;
begin
  Result := BigInteger.Min(A, B);
end;

function Max(const A, B: BigInteger): BigInteger; inline;
begin
  Result := BigInteger.Max(A, B);
end;

function GreaterSize(Left, Right: Integer): Integer; inline;
begin
  Result := IntMax(SizeBitsOf(Left), SizeBitsOf(Right));
end;

function LesserSize(Left, Right: Integer): Integer; inline;
begin
  Result := IntMin(SizeBitsOf(Left), SizeBitsOf(Right));
end;

function AllocLimbs(Size: Integer): PLimb; inline;
begin
  GetMem(Result, Size * CLimbSize);
end;

procedure CopyLimbs(Src, Dest: PLimb; Count: Integer); inline;
begin
  Move(Src^, Dest^, Count * CLimbSize);
end;

{ TBigInteger }

procedure ShallowCopy(const Value: BigInteger; var Result: BigInteger); inline;
begin
  Result.FSize := Value.FSize;
  Result.FData := Value.FData;
end;

procedure DeepCopy(const Value: BigInteger; var Result: BigInteger); inline;
begin
  Result.FSize := Value.FSize;
  Result.FData := Copy(Value.FData);
end;

class function BigInteger.Abs(const Int: BigInteger): BigInteger;
begin
  ShallowCopy(Int, Result);
  Result.SetSign(0);
end;

class function BigInteger.Add(const Left, Right: BigInteger): BigInteger;
var
  Res: BigInteger;
  LSize, RSize: Integer;
  SignBit: Integer;
  Comparison: TValueSign;
begin
  if Left.FData = nil then
  begin
    ShallowCopy(Right, Result);
    Exit;
  end
  else if Right.FData = nil then
  begin
    ShallowCopy(Left, Result);
    Exit;
  end;

  LSize := SizeBitsOf(Left.FSize);
  RSize := SizeBitsOf(Right.FSize);
  Res.MakeSize(IntMax(LSize, RSize) + 1);

  if ((Left.FSize xor Right.FSize) and SignMask) = 0 then
  begin
    // Same sign: add both magnitudes and transfer sign.
    FInternalAdd(PLimb(Left.FData), PLimb(Right.FData), PLimb(Res.FData), LSize, RSize);
    SignBit := SignBitOf(Left.FSize);
  end
  else
  begin
    Comparison := InternalCompare(PLimb(Left.FData), PLimb(Right.FData), Left.FSize and SizeMask, Right.FSize and SizeMask);
    case Comparison of
      -1: // Left < Right
        begin
          FInternalSubtract(PLimb(Right.FData), PLimb(Left.FData), PLimb(Res.FData), RSize, LSize);
          SignBit := SignBitOf(Right.FSize);
        end;
      1: // Left > Right
        begin
          FInternalSubtract(PLimb(Left.FData), PLimb(Right.FData), PLimb(Res.FData), LSize, RSize);
          SignBit := SignBitOf(Left.FSize);
        end;
      else // Left = Right
        begin
          // Left and Right have equal magnitude but different sign, so return 0.
          ShallowCopy(Zero, Result);
          Exit;
        end;
    end;
  end;
  Res.FSize := (Res.FSize and SizeMask) or SignBit;
  Res.Compact;
  ShallowCopy(Res, Result);
end;

class operator BigInteger.Add(const Left, Right: BigInteger): BigInteger;
begin
  Result := Add(Left, Right);
end;

class procedure BigInteger.Binary;
begin
  FBase := 2;
end;

class procedure BigInteger.InternalAnd(Left, Right, Result: PLimb; LSize, RSize: Integer);
{$IFDEF PUREPASCAL}
var
  I: Integer;
begin
  if LSize < RSize then
    RSize := LSize;
  for I := 0 to RSize - 1 do
    Result[I] := Left[I] and Right[I];
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     EBX,RSize
        MOV     EDI,LSize

        CMP     EDI,EBX
        JAE     @SkipSwap
        XCHG    EBX,EDI
        XCHG    EAX,EDX

@SkipSwap:

        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     ESI,[EAX]
        AND     ESI,[EDX]
        MOV     [ECX],ESI

        MOV     ESI,[EAX + CLimbSize]
        AND     ESI,[EDX + CLimbSize]
        MOV     [ECX + CLimbSize],ESI

        MOV     ESI,[EAX + 2*CLimbSize]
        AND     ESI,[EDX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],ESI

        MOV     ESI,[EAX + 3*CLimbSize]
        AND     ESI,[EDX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],ESI

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     EDX,[EDX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @MainLoop

@MainTail:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     EDX,[EDX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@JumpsMain]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump tables manually, with NOPs.

@JumpsMain:

        DD      @Exit
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     ESI,[EAX - 3*CLimbSize]
        AND     ESI,[EDX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],ESI

@Main2:

        MOV     ESI,[EAX - 2*CLimbSize]
        AND     ESI,[EDX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],ESI

@Main1:

        MOV     ESI,[EAX - CLimbSize]
        AND     ESI,[EDX - CLimbSize]
        MOV     [ECX - CLimbSize],ESI

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        MOV     R10D,RSize

        CMP     R9D,R10D
        JAE     @SkipSwap
        XCHG    R10D,R9D
        XCHG    RCX,RDX

@SkipSwap:

        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     RAX,[RCX]
        AND     RAX,[RDX]
        MOV     [R8],RAX
        MOV     RAX,[RCX + DLimbSize]
        AND     RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX
        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @MainLoop

@MainTail:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     RDX,[RDX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@JumpsMain]
        JMP     [R10 + R9*TYPE Pointer]

        // Align jump table manually, with NOPs

        NOP
        NOP
        NOP

@JumpsMain:

        DQ      @Exit
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     EAX,[RCX - 3*CLimbSize]
        AND     EAX,[RDX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[RCX - 2*CLimbSize]
        AND     EAX,[RDX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[RCX - CLimbSize]
        AND     EAX,[RDX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@Exit:

end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

class procedure BigInteger.InternalXor(Left, Right, Result: PLimb; LSize, RSize: Integer);
{$IFDEF PUREPASCAL}
var
  I: Integer;
  P: PLimb;
begin
  if LSize < RSize then
  begin
    // Swap left and right pointers and sizes.
    I := LSize;
    LSize := RSize;
    RSize := I;
    P := Left;
    Left := Right;
    Right := P;
  end;
  for I := 0 to RSize - 1 do
    Result[I] := Left[I] xor Right[I];
  for I := RSize to LSize - 1 do
    Result[I] := Left[I];
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     EBX,RSize
        MOV     EDI,LSize

        CMP     EDI,EBX
        JAE     @SkipSwap
        XCHG    EBX,EDI
        XCHG    EAX,EDX

@SkipSwap:

        SUB     EDI,EBX
        PUSH    EDI                             // Number of "tail" loops
        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     ESI,[EAX]
        XOR     ESI,[EDX]
        MOV     [ECX],ESI

        MOV     ESI,[EAX + CLimbSize]
        XOR     ESI,[EDX + CLimbSize]
        MOV     [ECX + CLimbSize],ESI

        MOV     ESI,[EAX + 2*CLimbSize]
        XOR     ESI,[EDX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],ESI

        MOV     ESI,[EAX + 3*CLimbSize]
        XOR     ESI,[EDX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],ESI

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     EDX,[EDX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @MainLoop

@MainTail:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     EDX,[EDX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@JumpsMain]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump table manually, with NOPs

        NOP

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     ESI,[EAX - 3*CLimbSize]
        XOR     ESI,[EDX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],ESI

@Main2:

        MOV     ESI,[EAX - 2*CLimbSize]
        XOR     ESI,[EDX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],ESI

@Main1:

        MOV     ESI,[EAX - CLimbSize]
        XOR     ESI,[EDX - CLimbSize]
        MOV     [ECX - CLimbSize],ESI

@DoRestLoop:

        XOR     EDX,EDX
        POP     EBX
        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CunrollShift
        JE      @RestLast3

@RestLoop:

        MOV     EDX,[EAX]
        MOV     [ECX],EDX

        MOV     EDX,[EAX + CLimbSize]
        MOV     [ECX + CLimbSize],EDX

        MOV     EDX,[EAX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],EDX

        MOV     EDX,[EAX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],EDX

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @RestLoop

@RestLast3:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@RestJumps]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump table manually, with NOPs.

        NOP
        NOP

@RestJumps:

        DD      @Exit
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EDX,[EAX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],EDX

@Rest2:

        MOV     EDX,[EAX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],EDX

@Rest1:

        MOV     EDX,[EAX - CLimbSize]
        MOV     [ECX - CLimbSize],EDX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        MOV     R10D,RSize

        CMP     R9D,R10D
        JAE     @SkipSwap
        XCHG    R10D,R9D
        XCHG    RCX,RDX

@SkipSwap:

        SUB     R9D,R10D
        PUSH    R9
        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     RAX,[RCX]
        XOR     RAX,[RDX]
        MOV     [R8],RAX

        MOV     RAX,[RCX + DLimbSize]
        XOR     RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @MainLoop

@MainTail:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     RDX,[RDX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@JumpsMain]
        JMP     [R10 + R9*TYPE Pointer]

@JumpsMain:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     EAX,[RCX - 3*CLimbSize]
        XOR     EAX,[RDX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[RCX - 2*CLimbSize]
        XOR     EAX,[RDX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[RCX - CLimbSize]
        XOR     EAX,[RDX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@DoRestLoop:

        POP     R10
        TEST    R10D,R10D
        JE      @Exit
        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @RestLast3

@RestLoop:

        MOV     RAX,[RCX]
        MOV     [R8],RAX

        MOV     RAX,[RCX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @RestLoop

@RestLast3:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@RestJumps]
        JMP     [R10 + R9*TYPE Pointer]

@RestJumps:

        DQ      @Exit
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     EAX,[RCX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[RCX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[RCX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@Exit:

end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

class procedure BigInteger.InternalOr(Left, Right, Result: PLimb; LSize, RSize: Integer);
{$IFDEF PUREPASCAL}
var
  I: Integer;
  P: PLimb;
begin
  if LSize < RSize then
  begin
    // Swap left and right pointers and sizes.
    I := LSize;
    LSize := RSize;
    RSize := I;
    P := Left;
    Left := Right;
    Right := P;
  end;
  for I := 0 to RSize - 1 do
    Result[I] := Left[I] or Right[I];
  for I := RSize to LSize - 1 do
    Result[I] := Left[I];
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     EBX,RSize
        MOV     EDI,LSize

        CMP     EDI,EBX
        JAE     @SkipSwap
        XCHG    EBX,EDI
        XCHG    EAX,EDX

@SkipSwap:

        SUB     EDI,EBX
        PUSH    EDI                             // Number of "rest" loops
        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     ESI,[EAX]
        OR      ESI,[EDX]
        MOV     [ECX],ESI

        MOV     ESI,[EAX + CLimbSize]
        OR      ESI,[EDX + CLimbSize]
        MOV     [ECX + CLimbSize],ESI

        MOV     ESI,[EAX + 2*CLimbSize]
        OR      ESI,[EDX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],ESI

        MOV     ESI,[EAX + 3*CLimbSize]
        OR      ESI,[EDX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],ESI

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     EDX,[EDX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @MainLoop

@MainTail:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     EDX,[EDX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@JumpsMain]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump table manually, with NOPs

        NOP

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     ESI,[EAX - 3*CLimbSize]
        OR      ESI,[EDX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],ESI

@Main2:

        MOV     ESI,[EAX - 2*CLimbSize]
        OR      ESI,[EDX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],ESI

@Main1:

        MOV     ESI,[EAX - CLimbSize]
        OR      ESI,[EDX - CLimbSize]
        MOV     [ECX - CLimbSize],ESI

@DoRestLoop:

        XOR     EDX,EDX
        POP     EBX
        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CUnrollShift
        JE      @RestLast3

@RestLoop:

        MOV     EDX,[EAX]
        MOV     [ECX],EDX

        MOV     EDX,[EAX + CLimbSize]
        MOV     [ECX + CLimbSize],EDX

        MOV     EDX,[EAX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],EDX

        MOV     EDX,[EAX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],EDX

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @RestLoop

@RestLast3:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@RestJumps]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump table manually, with NOPs.

        NOP
        NOP

@RestJumps:

        DD      @Exit
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EDX,[EAX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],EDX

@Rest2:

        MOV     EDX,[EAX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],EDX

@Rest1:

        MOV     EDX,[EAX - CLimbSize]
        MOV     [ECX - CLimbSize],EDX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        MOV     R10D,RSize

        CMP     R9D,R10D
        JAE     @SkipSwap
        XCHG    R10D,R9D
        XCHG    RCX,RDX

@SkipSwap:

        SUB     R9D,R10D
        PUSH    R9
        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     RAX,[RCX]
        OR      RAX,[RDX]
        MOV     [R8],RAX

        MOV     RAX,[RCX + DLimbSize]
        OR      RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @MainLoop

@MainTail:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     RDX,[RDX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@JumpsMain]
        JMP     [R10 + R9*TYPE Pointer]

        // Align jump table manually, with NOPs.

        DB      $90,$90,$90,$90,$90,$90

@JumpsMain:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     EAX,[RCX - 3*CLimbSize]
        OR      EAX,[RDX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[RCX - 2*CLimbSize]
        OR      EAX,[RDX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[RCX - CLimbSize]
        OR      EAX,[RDX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@DoRestLoop:

        POP     R10
        TEST    R10D,R10D
        JE      @Exit
        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @RestLast3

@RestLoop:

        MOV     RAX,[RCX]
        MOV     [R8],RAX

        MOV     RAX,[RCX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @RestLoop

@RestLast3:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@RestJumps]
        JMP     [R10 + R9*TYPE Pointer]

        // Align jump table manually, with NOPs.

        // -- Aligned.

@RestJumps:

        DQ      @Exit
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     EAX,[RCX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[RCX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[RCX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@Exit:

end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

class procedure BigInteger.InternalAndNot(Left, Right, Result: PLimb; LSize, RSize: Integer);
{$IFDEF PUREPASCAL}
var
  I: Integer;
begin

  // Note: AndNot is - of course - not commutative.
  if LSize < RSize then
    RSize := LSize;
  for I := 0 to RSize - 1 do
    Result[I] := not Right[I] and Left[I];
  for I := RSize to LSize - 1 do
    Result[I] := Left[I];
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     EBX,RSize
        MOV     EDI,LSize

        CMP     EDI,EBX
        JAE     @SkipSwap
        MOV     EBX,EDI

@SkipSwap:

        SUB     EDI,EBX
        PUSH    EDI                             // Number of "rest" loops
        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     ESI,[EDX]
        NOT     ESI
        AND     ESI,[EAX]
        MOV     [ECX],ESI

        MOV     ESI,[EDX + CLimbSize]
        NOT     ESI
        AND     ESI,[EAX + CLimbSize]
        MOV     [ECX + CLimbSize],ESI

        MOV     ESI,[EDX + 2*CLimbSize]
        NOT     ESI
        AND     ESI,[EAX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],ESI

        MOV     ESI,[EDX + 3*CLimbSize]
        NOT     ESI
        AND     ESI,[EAX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],ESI

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     EDX,[EDX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @MainLoop

@MainTail:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     EDX,[EDX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@JumpsMain]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump table manually, with NOPs

        NOP
        NOP

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     ESI,[EDX - 3*CLimbSize]
        NOT     ESI
        AND     ESI,[EAX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],ESI

@Main2:

        MOV     ESI,[EDX - 2*CLimbSize]
        NOT     ESI
        AND     ESI,[EAX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],ESI

@Main1:

        MOV     ESI,[EDX - CLimbSize]
        NOT     ESI
        AND     ESI,[EAX - CLimbSize]
        MOV     [ECX - CLimbSize],ESI

@DoRestLoop:

        XOR     EDX,EDX
        POP     EBX
        MOV     EDI,EBX
        AND     EDI,CUnrollMask
        SHR     EBX,CUnrollShift
        JE      @RestLast3

@RestLoop:

        //      X AND NOT 0 = X AND -1 = X
        MOV     EDX,[EAX]
        MOV     [ECX],EDX

        MOV     EDX,[EAX + CLimbSize]
        MOV     [ECX + CLimbSize],EDX

        MOV     EDX,[EAX + 2*CLimbSize]
        MOV     [ECX + 2*CLimbSize],EDX

        MOV     EDX,[EAX + 3*CLimbSize]
        MOV     [ECX + 3*CLimbSize],EDX

        LEA     EAX,[EAX + 4*CLimbSize]
        LEA     ECX,[ECX + 4*CLimbSize]
        DEC     EBX
        JNE     @RestLoop

@RestLast3:

        LEA     EAX,[EAX + EDI*CLimbSize]
        LEA     ECX,[ECX + EDI*CLimbSize]
        LEA     EBX,[@RestJumps]
        JMP     [EBX + EDI*TYPE Pointer]

        // Align jump table manually, with NOPs.

@RestJumps:

        DD      @Exit
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EDX,[EAX - 3*CLimbSize]
        MOV     [ECX - 3*CLimbSize],EDX

@Rest2:

        MOV     EDX,[EAX - 2*CLimbSize]
        MOV     [ECX - 2*CLimbSize],EDX

@Rest1:

        MOV     EDX,[EAX - CLimbSize]
        MOV     [ECX - CLimbSize],EDX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        MOV     R10D,RSize

        CMP     R9D,R10D
        JAE     @SkipSwap
        MOV     R10D,R9D

@SkipSwap:

        SUB     R9D,R10D
        PUSH    R9
        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @MainTail

@MainLoop:

        MOV     RAX,[RDX]
        NOT     RAX
        AND     RAX,[RCX]
        MOV     [R8],RAX

        MOV     RAX,[RDX + DLimbSize]
        NOT     RAX
        AND     RAX,[RCX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @MainLoop

@MainTail:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     RDX,[RDX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@JumpsMain]
        JMP     [R10 + R9*TYPE Pointer]

        // Align jump table manually, with NOPs.

        DB      $90,$90,$90

@JumpsMain:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     EAX,[RDX - 3*CLimbSize]
        NOT     EAX
        AND     EAX,[RCX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[RDX - 2*CLimbSize]
        NOT     EAX
        AND     EAX,[RCX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[RDX - CLimbSize]
        NOT     EAX
        AND     EAX,[RCX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@DoRestLoop:

        POP     R10
        TEST    R10D,R10D
        JE      @Exit
        MOV     R9D,R10D
        AND     R9D,CUnrollMask
        SHR     R10D,CUnrollShift
        JE      @RestLast3

@RestLoop:

        //      X AND NOT 0 = X AND -1 = X

        MOV     RAX,[RCX]
        MOV     RDX,[RCX + DLimbSize]
        MOV     [R8],RAX
        MOV     [R8 + DLimbSize],RDX

        LEA     RCX,[RCX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]
        DEC     R10D
        JNE     @RestLoop

@RestLast3:

        LEA     RCX,[RCX + R9*CLimbSize]
        LEA     R8,[R8 + R9*CLimbSize]
        LEA     R10,[@RestJumps]
        JMP     [R10 + R9*TYPE Pointer]

        // Align jump table manually, with NOPs.

        DB      $90,$90

@RestJumps:

        DQ      @Exit
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     EAX,[RCX - 3*CLimbSize]
        MOV     [R8 - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[RCX - 2*CLimbSize]
        MOV     [R8 - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[RCX - CLimbSize]
        MOV     [R8 - CLimbSize],EAX

@Exit:

end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

class procedure BigInteger.InternalNotAnd(Left, Right, Result: PLimb; LSize, RSize: Integer);
begin
  InternalAndNot(Right, Left, Result, RSize, LSize);
end;

class operator BigInteger.BitwiseAnd(const Left, Right: BigInteger): BigInteger;
begin

  // Special handling for 0.
  if (Left.FData = nil)  or (Right.FData = nil) then
  begin
    Result.FData := nil;
    Result.FSize := 0;
    Exit;
  end;

  InternalBitwise(Left, Right, Result, InternalAnd, InternalOr, InternalAndNot);
end;

class operator BigInteger.BitwiseOr(const Left, Right: BigInteger): BigInteger;
begin

  // Special handling for 0.
  if Left.FData = nil then
  begin
    ShallowCopy(Right, Result);
    Exit;
  end
  else if Right.FData = nil then
  begin
    ShallowCopy(Left, Result);
    Exit;
  end;

  InternalBitwise(Left, Right, Result, InternalOr, InternalAnd, InternalNotAnd);
end;

class operator BigInteger.BitwiseXor(const Left, Right: BigInteger): BigInteger;
begin

  // Special handling for 0.
  if Left.FData = nil then
  begin
    ShallowCopy(Right, Result);
    Exit;
  end
  else if Right.FData = nil then
  begin
    ShallowCopy(Left, Result);
    Exit;
  end;

  InternalBitwise(Left, Right, Result, InternalXor, InternalXor, InternalXor);
end;

function BigInteger.Clone: BigInteger;
begin
  DeepCopy(Self, Result);
end;

function FindSize(Limb: PLimb; Size: Integer): Integer;
{$IFDEF PUREPASCAL}
begin
  while (Size > 0) and (Limb[Size - 1] = 0) do
    Dec(Size);
  Result := Size;
end;
{$ELSE}
{$IFDEF WIN32}
asm

        LEA     EAX,[EAX + EDX * CLimbSize - CLimbSize]
        XOR     ECX,ECX

@Loop:

        CMP     [EAX],ECX
        JNE     @Exit
        LEA     EAX,[EAX - CLimbSize]
        DEC     EDX
        JNE     @Loop

@Exit:

        MOV     EAX,EDX

end;
{$ELSE !WIN32}
asm

        LEA     RAX,[RCX + RDX * CLimbSize - CLimbSize]
        XOR     ECX,ECX

@Loop:

        CMP     [RAX],ECX
        JNE     @Exit
        LEA     RAX,[RAX - CLimbSize]
        DEC     EDX
        JNE     @Loop

@Exit:

        MOV     EAX,EDX

end;
{$ENDIF !WIN32}
{$ENDIF}

procedure BigInteger.Compact;
var
  NewSize: Integer;
begin
  if FData = nil then
  begin
    FSize := 0;
    Exit;
  end;

  NewSize := FindSize(PLimb(FData), FSize and SizeMask);
  if NewSize < (FSize and SizeMask) then
  begin
    if NewSize = 0 then
    begin
      FSize := 0;
      FData := nil;
    end
    else
    begin
      FSize := SignBitOf(FSize) or NewSize;
    {$IFDEF RESETSIZE}
      SetLength(FData, (NewSize + 4) and CapacityMask);
    {$ENDIF}
    end;
  end;
end;

class function BigInteger.Compare(const Left, Right: BigInteger): TValueSign;
begin
  Result := InternalCompare(PLimb(Left.FData), PLimb(Right.FData), Left.FSize and SizeMask, Right.FSize and SizeMask);
  if Left.FSize < 0 then
    if Right.FSize < 0 then
      Result := -Result
    else
      Result := -1
  else if Right.FSize < 0 then
    Result := 1;
end;

constructor BigInteger.Create(const Int: Integer);
begin
  Create(Cardinal(System.Abs(Int)));
  if Int < 0 then
    FSize := FSize or SignMask;
end;

constructor BigInteger.Create(const Int: BigInteger);
begin
  Self.FSize := Int.FSize;
  Self.FData := Int.FData;
end;

constructor BigInteger.Create(const Data: TMagnitude; Negative: Boolean);
begin
  FSize := Length(Data) or (Ord(Negative) * SignMask);
  FData := Copy(Data);
  Compact;
end;

constructor BigInteger.Create(const Int: UInt64);
begin
  FData := nil;
  if Int <> 0 then
  begin
    if Int > High(UInt32) then
      FSize := CUInt64Limbs
    else
      FSize := 1;
    SetLength(FData, 4);
    Move(Int, FData[0], SizeOf(Int));
  end
  else
  begin
    FData := nil;
    FSize := 0;
  end;
end;

const
  CMantissaBits = 52;
  CMaxShift     = 62;

function IsDenormal(const ADouble: Double): Boolean; inline;
begin
  Result := (PUInt64(@ADouble)^ and (UInt64($7FF) shl CMantissaBits)) = 0
end;

function MantissaOf(const ADouble: Double): UInt64; inline;
begin
  Result := PUInt64(@ADouble)^ and (UInt64(-1) shr (64 - CMantissaBits));
  if not IsDenormal(ADouble) then
    Result := Result or (UInt64(1) shl CMantissaBits);
end;

function ExponentOf(const ADouble: Double): Integer;
begin
  Result := ((PUInt64(@ADouble)^ shr CMantissaBits) and $7FF) - 1023;
  if Result = -1023 then
    Result := -1022;
end;

function SignOf(const ADouble: Double): Boolean;
begin
  Result := PInt64(@ADouble)^ < 0;
end;

constructor BigInteger.Create(const ADouble: Double);
var
  Exponent: Integer;
  Mantissa: UInt64;
  Sign, Guard, Round, Sticky: Boolean;
  Shift: Integer;
  ZeroExponentLimit: Integer;
begin
  FSize := 0;
//  FData := nil;

  // Error for special values.
  if IsNan(ADouble) or IsInfinite(ADouble) then
    Error(ecOverflow);

  // Get the required values from TDoubleHelper.
  Mantissa := MantissaOf(ADouble);
  Exponent := ExponentOf(ADouble);
  Sign := SignOf(ADouble);

  // Make 0 for denormal values and values < 0.5.
  if FRoundingMode <> rmTruncate then
    ZeroExponentLimit := -1
  else
    ZeroExponentLimit := 0;

  // Denormals and values with small exponent convert to 0.
  if IsDenormal(ADouble) or (Exponent < ZeroExponentLimit) then
  begin
    Self := BigInteger.Zero;
    Exit;
  end;

  // Internal shift of the mantissa.
  Shift := Exponent;
  if Shift > CMaxShift then
    Shift := CMaxShift;

  // Guard, Round and Sticky bits are used to determine rounding, see comments in function AsDouble.
  Guard := False;
  Round := False;
  Sticky := False;
  if (FRoundingMode <> rmTruncate) and (Exponent < CMantissaBits) then
  begin
    // Round anything with a fraction >= 0.5 away from 0. No Round and Sticky bits required.
    Guard := ((UInt64(1) shl (CMantissaBits - 1 - Exponent)) and Mantissa) <> 0;

    if FRoundingMode = rmRound then
    begin
      // Only if full rounding (like System.Round() performs) is required: Round any fraction > 0.5 away from 0.
      Round := ((UInt64(1) shl (CMantissaBits - 2 - Exponent)) and Mantissa) <> 0;
      Sticky := ((Int64(-1) shr (Exponent + (64 - CMantissaBits + 2))) and Mantissa) <> 0;
    end;
  end;

  // Shift mantissa left or right to get the most bits out of it before converting to BigInteger.
  if Shift > CMantissaBits then
    Mantissa := Mantissa shl (Shift - CMantissaBits)
  else
    Mantissa := Mantissa shr (CMantissaBits - Shift);

  // Round shifted mantissa.
  if ((RoundingMode = rmSchool) and Guard) or
     ((RoundingMode = rmRound) and (Guard and (Round or Sticky))) then
    Inc(Mantissa);

  // Turn shifted mantissa (a UInt64) into BigInteger.
  Self := 0;
  Self.Create(UInt64(Mantissa));

  // Shift left by the restant value of the exponent.
  if Exponent > Shift then
    Self := Self shl (Exponent - Shift);
  if Sign then
    FSize := FSize or SignMask;

end;

// Bytes are considered to contain value in two's complement format.
constructor BigInteger.Create(const Bytes: array of Byte);
var
  Limbs: TMagnitude;
  Negative: Boolean;
begin
  Negative := Bytes[High(Bytes)] >= $80;
  SetLength(Limbs, (Length(Bytes) + 3) div 4);
  if Negative then
    Limbs[High(Limbs)] := TLimb(-1);
  Move((@Bytes[0])^, PLimb(Limbs)^, Length(Bytes));
  if Negative then
    InternalNegate(PLimb(Limbs), PLimb(Limbs), Length(Limbs));
  Create(Limbs, Negative);
end;

// This assumes sign-magnitude format.
constructor BigInteger.Create(const Limbs: array of TLimb; Negative: Boolean);
var
  LSize: Integer;
begin
  LSize := Length(Limbs);
  MakeSize(LSize);
  FSize := LSize or (Ord(Negative) * SignMask);
  if LSize > 0 then
    CopyLimbs(@Limbs[0], PLimb(FData), LSize);
  Compact;
end;

constructor BigInteger.Create(const Int: Int64);
begin
  Create(UInt64(System.Abs(Int)));
  if Int < 0 then
    FSize := FSize or SignMask;
end;

constructor BigInteger.Create(const Int: Cardinal);
begin
  if Int <> 0 then
  begin
    FSize := 1;
    SetLength(FData, 4);
    FData[0] := Int;
  end
  else
  begin
    FData := nil;
    FSize := 0;
  end;
end;

constructor BigInteger.Create(NumBits: Integer; const Random: IRandom);
var
  Bytes: TArray<Byte>;
  Bits: Byte;
begin
  if NumBits = 0 then
  begin
    ShallowCopy(Zero, Self);
    Exit;
  end;

  SetLength(Bytes, (NumBits + 7) shr 3 + 1);
  Random.NextBytes(Bytes);

  // One byte too many was allocated, to get a top byte of 0, i.e. always positive.
  Bytes[High(Bytes)] := 0;

  // Set bits above required bit length to 0.
  Bits := NumBits and $07;
  Bytes[High(Bytes) - 1] := Bytes[High(Bytes) - 1] and ($7F shr (7 - Bits));
  Create(Bytes);
  Compact;
//  Assert(BitLength <= Numbits, Format('BitLength (%d) >= NumBits (%d): %s', [BitLength, NumBits, Self.ToString(2)]));
end;

function BigInteger.GetAllocated: Integer;
begin
  Result := Length(FData);
end;

function BigInteger.IsEven: Boolean;
begin
  Result := IsZero or ((FData[0] and 1) = 0);
end;

function BigInteger.IsNegative: Boolean;
begin
  Result := FSize < 0;
end;

function BigInteger.IsOne: Boolean;
begin
  Result := (FSize = 1) and (FData[0] = 1);
end;

function BigInteger.IsPositive: Boolean;
begin
  Result := FSize > 0;
end;

function BigInteger.IsPowerOfTwo: Boolean;
var
  FirstNonZeroIndex: Integer;
  AHigh: Integer;
begin
  AHigh := (FSize and SizeMask) - 1;
  if (FData = nil) or not Velthuis.Numerics.IsPowerOfTwo(FData[AHigh]) then
    Result := False
  else
  begin
    FirstNonZeroIndex := 0;

    // All limbs below top one must be 0
    while FData[FirstNonZeroIndex] = 0 do
      Inc(FirstNonZeroIndex);

    // Top limb must be power of two.
    Result := (FirstNonZeroIndex = AHigh);
  end;
end;

function BigInteger.GetSign: Integer;
begin
  if FData = nil then
  begin
    FSize := 0;
    Exit(0);
  end;

  if FSize > 0 then
    Result := 1
  else
    Result := -1
end;

function BigInteger.GetSize: Integer;
begin
  if FData = nil then
    FSize := 0;
  Result := FSize and SizeMask;
end;

function BigInteger.Data: PLimb;
begin
  Result := PLimb(FData);
end;

class operator BigInteger.GreaterThan(const Left, Right: BigInteger): Boolean;
begin
  Result := Compare(Left, Right) > 0;
//  Result := not (Left <= Right);
end;

class operator BigInteger.GreaterThanOrEqual(const Left, Right: BigInteger): Boolean;
begin
  Result := Compare(left, Right) >= 0;
end;

// http://en.wikipedia.org/wiki/Binary_GCD_algorithm
class function BigInteger.GreatestCommonDivisor(const Left, Right: BigInteger): BigInteger;
var
  Shift: Integer;
  ALeft, ARight, Temp: BigInteger;
begin
  // GCD(left, 0) = left; GCD(0, right) = right; GCD(0, 0) = 0
  if Left.IsZero then
    Exit(Abs(Right));
  if Right.IsZero then
    Exit(Abs(Left));

  // Let Shift = Log2(K), where K is the greatest power of 2 dividing both ALeft and ARight.
  ALeft := Abs(Left);
  ARight := Abs(Right);
  Shift := 0;
  while ALeft.IsEven and ARight.IsEven do
  begin
    ALeft := ALeft shr 1;
    ARight := ARight shr 1;
    Inc(Shift);
  end;

  while ALeft.IsEven do
    ALeft := ALeft shr 1;

  // Now, ALeft is always odd.
  repeat
    // Remove all factors of 2 in ARight, since they are not in common.
    // ARight is not 0, so the loop will terminate
    while ARight.IsEven do
      ARight := ARight shr 1;

    // ALeft and ARight are both odd. Swap if necessary, so that ALeft <= ARight,
    // then set ARight to ARight - ALeft (which is even).
    if ALeft > ARight then
    begin
      // Swap ALeft and ALeft.
      Temp := ALeft;
      ALeft := ARight;
      ARight := Temp;
    end;
    ARight := ARight - ALeft;
  until ARight = 0;

  // Restore common factors of 2.
  Result := ALeft shl Shift;
end;

class procedure BigInteger.Hexadecimal;
begin
  FBase := 16;
end;

class procedure BigInteger.Hex;
begin
  FBase := 16;
end;

class operator BigInteger.Implicit(const Int: Cardinal): BigInteger;
begin
  Result := BigInteger.Create(Int);
end;

class operator BigInteger.Implicit(const Int: Integer): BigInteger;
begin
  Result := BigInteger.Create(Int);
end;

class operator BigInteger.Implicit(const Int: UInt64): BigInteger;
begin
  Result := BigInteger.Create(Int);
end;

class operator BigInteger.Implicit(const Int: Int64): BigInteger;
begin
  Result := BigInteger.Create(Int);
end;

class constructor BigInteger.Initialize;
begin
  MinusOne := -1;
  Zero.FSize := 0;
  Zero.FData := nil;
  One := 1;
  Ten := 10;
  FBase := 10;
  FRoundingMode := rmTruncate;
{$IFNDEF PUREPASCAL}
  // See comments in BigInteger.InternalAddEmu.
  BigInteger.DetectPartialFlagsStall;
{$ELSE}
  FInternalAdd := InternalAddPurePascal;
  FInternalSubtract := InternalSubtractPurePascal;
{$ENDIF}
end;

class operator BigInteger.IntDivide(const Left, Right: BigInteger): BigInteger;
begin
  Result := Divide(Left, Right);
end;

class operator BigInteger.IntDivide(const Left: BigInteger; Right: UInt16): BigInteger;
begin
  Result := Divide(Left, Right);
end;

class operator BigInteger.IntDivide(const Left: BigInteger; Right: UInt32): BigInteger;
begin
  Result := Divide(Left, Right);
end;

{$IFNDEF PUREPASCAL}
class procedure BigInteger.InternalAddModified(Left, Right, Result: PLimb; LSize, RSize: Integer);
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX
        MOV     EDI,EDX
        MOV     EBX,ECX

        MOV     ECX,RSize
        MOV     EDX,LSize

        CMP     EDX,ECX
        JAE     @SkipSwap
        XCHG    ECX,EDX
        XCHG    ESI,EDI

@SkipSwap:

        SUB     EDX,ECX
        PUSH    EDX
        XOR     EDX,EDX

        XOR     EAX,EAX

        MOV     EDX,ECX
        AND     EDX,CunrollMask
        SHR     ECX,CunrollShift

        CLC
        JE      @MainTail

@MainLoop:

        MOV     EAX,[ESI]
        ADC     EAX,[EDI]
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        ADC     EAX,[EDI + CLimbSize]
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        ADC     EAX,[EDI + 2*CLimbSize]
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        ADC     EAX,[EDI + 3*CLimbSize]
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + CUnrollIncrement*CLimbSize]
        LEA     EDI,[EDI + CUnrollIncrement*CLimbSize]
        LEA     EBX,[EBX + CUnrollIncrement*CLimbSize]

        LEA     ECX,[ECX - 1]
        JECXZ   @Maintail
        JMP     @Mainloop

@MainTail:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EDI,[EDI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@JumpsMain]
        JMP     [ECX + EDX*TYPE Pointer]

        // Align jump table manually, with NOPs.

        NOP

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     EAX,[ESI - 3*CLimbSize]
        ADC     EAX,[EDI - 3*CLimbSize]
        MOV     [EBX - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[ESI - 2*CLimbSize]
        ADC     EAX,[EDI - 2*CLimbSize]
        MOV     [EBX - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[ESI - CLimbSize]
        ADC     EAX,[EDI - CLimbSize]
        MOV     [EBX - CLimbSize],EAX

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDI,EDI

        POP     ECX
        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        JECXZ   @RestLastN

@RestLoop:

        MOV     EAX,[ESI]
        ADC     EAX,EDI
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + CUnrollIncrement*CLimbSize]
        LEA     EBX,[EBX + CUnrollIncrement*CLimbSize]

        LEA     ECX,[ECX - 1]
        JECXZ   @RestLastN
        JMP     @RestLoop

@RestLastN:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@RestJumps]
        JMP     [ECX + EDX*TYPE Pointer]

        // Align jump table manually, with NOPs.

        NOP

@RestJumps:

        DD      @LastLimb
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EAX,[ESI - 3*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[ESI - 2*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[ESI - CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX - CLimbSize],EAX

@LastLimb:

        ADC     EDI,EDI
        MOV     [EBX],EDI

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        MOV     R10,RCX
        MOV     ECX,RSize

        CMP     R9D,ECX
        JAE     @SkipSwap
        XCHG    ECX,R9D
        XCHG    R10,RDX

@SkipSwap:

        SUB     R9D,ECX
        PUSH    R9

        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail

@MainLoop:

        MOV     RAX,[R10]
        ADC     RAX,[RDX]
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize]
        ADC     RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        LEA     RCX,[RCX - 1]
        JECXZ   @MainTail
        JMP     @MainLoop

@MainTail:

        LEA     RCX,[@MainJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // Align jump table manually, with NOPs.

        DB      $90,$90,$90,$90

@MainJumps:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     RAX,[R10]
        ADC     RAX,[RDX]
        MOV     [R8],RAX

        MOV     EAX,[R10 + 2*CLimbSize]
        ADC     EAX,[RDX + 2*CLimbSize]
        MOV     [R8 + 2*CLimbSize],EAX

        LEA     R10,[R10 + 3*CLimbSize]
        LEA     RDX,[RDX + 3*CLimbSize]
        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @DoRestLoop

@Main2:

        MOV     RAX,[R10]
        ADC     RAX,[RDX]
        MOV     [R8],RAX

        LEA     R10,[R10 + 2*CLimbSize]
        LEA     RDX,[RDX + 2*CLimbSize]
        LEA     R8,[R8 + 2*CLimbSize]

        JMP     @DoRestLoop

@Main1:

        MOV     EAX,[R10]
        ADC     EAX,[RDX]
        MOV     [R8],EAX

        LEA     R10,[R10 + CLimbSize]
        LEA     RDX,[RDX + CLimbSize]
        LEA     R8,[R8 + CLimbSize]

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDX,EDX

        POP     RCX
        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        JECXZ   @RestLast3

@RestLoop:

        MOV     RAX,[R10]
        ADC     RAX,RDX
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize]
        ADC     RAX,RDX
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        LEA     RCX,[RCX - 1]
        JECXZ   @RestLast3
        JMP     @RestLoop

@RestLast3:

        LEA     RCX,[@RestJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // If necessary, align second jump table with NOPs.

        DB      $90,$90,$90,$90,$90,$90

@RestJumps:

        DQ      @LastLimb
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     RAX,[R10]
        ADC     RAX,RDX
        MOV     [R8],RAX

        MOV     EAX,[R10 + 2*CLimbSize]
        ADC     EAX,EDX
        MOV     [R8 + 2*CLimbSize],EAX

        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @LastLimb

@Rest2:

        MOV     RAX,[R10]
        ADC     RAX,RDX
        MOV     [R8],RAX

        LEA     R8,[R8 + 2*CLimbSize]

        JMP     @LastLimb

@Rest1:

        MOV     EAX,[R10]
        ADC     EAX,EDX
        MOV     [R8],EAX

        LEA     R8,[R8 + CLimbSize]

@LastLimb:

        ADC     EDX,EDX
        MOV     [R8],EDX

@Exit:

end;
{$ENDIF WIN32/WIN64}

class procedure BigInteger.InternalAddPlain(Left, Right, Result: PLimb; LSize, RSize: Integer);

//==============================================//
//                                              //
// To understand the code, please read this:    //
//                                              //
//   http://stackoverflow.com/q/32084204/95954  //
//                                              //
// especially Peter Cordes' answer:             //
//                                              //
//   http://stackoverflow.com/a/32087095/95954  //
//                                              //
//==============================================//

{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX                         // Left
        MOV     EDI,EDX                         // Right
        MOV     EBX,ECX                         // Result

        MOV     ECX,RSize
        MOV     EDX,LSize

        CMP     EDX,ECX
        JAE     @SkipSwap
        XCHG    ECX,EDX
        XCHG    ESI,EDI

@SkipSwap:

        SUB     EDX,ECX
        PUSH    EDX
        XOR     EDX,EDX

        XOR     EAX,EAX

        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail

@MainLoop:

        MOV     EAX,[ESI]
        ADC     EAX,[EDI]
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        ADC     EAX,[EDI + CLimbSize]
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        ADC     EAX,[EDI + 2*CLimbSize]
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        ADC     EAX,[EDI + 3*CLimbSize]
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + 4*CLimbSize]
        LEA     EDI,[EDI + 4*CLimbSize]
        LEA     EBX,[EBX + 4*CLimbSize]

        DEC     ECX
        JNE     @MainLoop

@MainTail:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EDI,[EDI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@JumpsMain]
        JMP     [ECX + EDX*TYPE Pointer]

        // Align jump table manually, with NOPs. Update if necessary.

        NOP

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     EAX,[ESI - 3*CLimbSize]
        ADC     EAX,[EDI - 3*CLimbSize]
        MOV     [EBX - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[ESI - 2*CLimbSize]
        ADC     EAX,[EDI - 2*CLimbSize]
        MOV     [EBX - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[ESI - CLimbSize]
        ADC     EAX,[EDI - CLimbSize]
        MOV     [EBX - CLimbSize],EAX

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDI,EDI

        POP     ECX
        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        INC     ECX
        DEC     ECX
        JE      @RestLast3              // JECXZ is slower than INC/DEC/JE

@RestLoop:

        MOV     EAX,[ESI]
        ADC     EAX,EDI
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + 4*CLimbSize]
        LEA     EBX,[EBX + 4*CLimbSize]

        DEC     ECX
        JNE     @RestLoop

@RestLast3:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@RestJumps]
        JMP     [ECX + EDX*TYPE Pointer]

        // If necessary, align second jump table with NOPs

        NOP
        NOP
        NOP

@RestJumps:

        DD      @LastLimb
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EAX,[ESI - 3*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[ESI - 2*CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[ESI - CLimbSize]
        ADC     EAX,EDI
        MOV     [EBX - CLimbSize],EAX

@LastLimb:

        ADC     EDI,EDI
        MOV     [EBX],EDI

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        MOV     R10,RCX
        MOV     ECX,RSize

        CMP     R9D,ECX
        JAE     @SkipSwap
        XCHG    ECX,R9D
        XCHG    R10,RDX

@SkipSwap:

        SUB     R9D,ECX
        PUSH    R9

        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail

@MainLoop:

        MOV     RAX,[R10]
        ADC     RAX,[RDX]
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize]
        ADC     RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        DEC     ECX
        JNE     @MainLoop

@MainTail:

        LEA     RCX,[@MainJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // Align jump table. Update if necessary!

        NOP

@MainJumps:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     RAX,[R10]
        ADC     RAX,[RDX]
        MOV     [R8],RAX

        MOV     EAX,[R10 + 2*CLimbSize]
        ADC     EAX,[RDX + 2*CLimbSize]
        MOV     [R8 + 2*CLimbSize],EAX

        LEA     R10,[R10 + 3*CLimbSize]
        LEA     RDX,[RDX + 3*CLimbSize]
        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @DoRestLoop

@Main2:

        MOV     RAX,[R10]
        ADC     RAX,[RDX]
        MOV     [R8],RAX

        LEA     R10,[R10 + 2*CLimbSize]
        LEA     RDX,[RDX + 2*CLimbSize]
        LEA     R8,[R8 + 2*CLimbSize]

        JMP     @DoRestLoop

@Main1:

        MOV     EAX,[R10]
        ADC     EAX,[RDX]
        MOV     [R8],EAX

        LEA     R10,[R10 + CLimbSize]
        LEA     RDX,[RDX + CLimbSize]
        LEA     R8,[R8 + CLimbSize]

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDX,EDX

        POP     RCX
        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        INC     ECX
        DEC     ECX
        JE      @RestLast3

@RestLoop:

        MOV     RAX,[R10]
        ADC     RAX,RDX
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize]
        ADC     RAX,RDX
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        DEC     ECX
        JNE     @RestLoop

@RestLast3:

        LEA     RCX,[@RestJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // If necessary, align second jump table with NOPs

        // -- Aligned.

@RestJumps:

        DQ      @LastLimb
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     RAX,[R10]
        ADC     RAX,RDX
        MOV     [R8],RAX

        MOV     EAX,[R10 + DLimbSize]
        ADC     EAX,EDX
        MOV     [R8 + DLimbSize],EAX

        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @LastLimb

@Rest2:

        MOV     RAX,[R10]
        ADC     RAX,RDX
        MOV     [R8],RAX

        LEA     R8,[R8 + DLimbSize]

        JMP     @LastLimb

@Rest1:

        MOV     EAX,[R10]
        ADC     EAX,EDX
        MOV     [R8],EAX

        LEA     R8,[R8 + CLimbSize]

@LastLimb:

        ADC     EDX,EDX
        MOV     [R8],EDX

@Exit:

end;
{$ENDIF !WIN32}
{$ENDIF !PUREPASCAL}

{$IFDEF PUREPASCAL}
class procedure BigInteger.InternalAddPurePascal(Left, Right, Result: PLimb; LSize, RSize: Integer);
var
  I: Integer;
  Carry, InterCarry: TLimb;
  PTemp: PLimb;
  LCount, LTail: Integer;
  Sum: TLimb;
{$IFDEF CPUX64}
  Left64, Sum64, Carry64, InterCarry64: UInt64;
{$ELSE}
  Left32: TLimb;
{$ENDIF}
begin
  if LSize < RSize then
  begin
    PTemp := Left;
    Left := Right;
    Right := PTemp;
    I := LSize;
    LSize := RSize;
    RSize := I;
  end;
  // From here on,
  // - Left is larger and LSize is its size,
  // - Right is smaller and RSize is its size.

  // Emulate ADC (add with carry)
  Carry := 0;
  Dec(LSize, RSize);

  LTail := RSize and CUnrollMask;
  LCount := RSize shr CUnrollShift;

{$IFDEF CPUX64}
  Carry64 := 0;
{$ENDIF}
  while LCount > 0 do
  begin
{$IFDEF CPUX64}
    Left64 := PUInt64(Left)[0];
    Sum64 := Left64 + PUInt64(Right)[0];
    InterCarry64 := Ord(Sum64 < Left64);
    Inc(Sum64, Carry64);
    PUInt64(Result)[0] := Sum64;
    Carry64 := InterCarry64 or Ord(Sum64 < Carry64);

    Left64 := PUInt64(Left)[1];
    Sum64 := Left64 + PUInt64(Right)[1];
    InterCarry64 := Ord(Sum64 < Left64);
    Inc(Sum64, Carry64);
    PUInt64(Result)[1] := Sum64;
    Carry64 := InterCarry64 or Ord(Sum64 < Carry64);
{$ELSE !CPUX64}
    Left32 := Left[0];
    Sum := Left32 + Right[0];
    InterCarry := TLimb(Sum < Left32);
    Inc(Sum, Carry);
    Result[0] := Sum;
    Carry := InterCarry or TLimb(Sum < Carry);

    Left32 := Left[1];
    Sum := Left32 + Right[1];
    InterCarry := TLimb(Sum < Left32);
    Inc(Sum, Carry);
    Result[1] := Sum;
    Carry := InterCarry or TLimb(Sum < Carry);

    Left32 := Left[2];
    Sum := Left32 + Right[2];
    InterCarry := TLimb(Sum < Left32);
    Inc(Sum, Carry);
    Result[2] := Sum;
    Carry := InterCarry or TLimb(Sum < Carry);

    Left32 := Left[3];
    Sum := Left32 + Right[3];
    InterCarry := TLimb(Sum < Left32);
    Inc(Sum, Carry);
    Result[3] := Sum;
    Carry := InterCarry or TLimb(Sum < Carry);
{$ENDIF}

    Inc(Left, CUnrollIncrement);
    Inc(Right, CUnrollIncrement);
    Inc(Result, CUnrollIncrement);
    Dec(LCount);
  end;

{$IFDEF CPUX64}
  Carry := Carry64;
{$ENDIF}

  while LTail > 0 do
  begin
    Sum := Left[0] + Right[0];
    InterCarry := TLimb(Sum < Left[0]);
    Inc(Sum, Carry);
    Result[0] := Sum;
    Carry := TLimb(Sum < Carry) or InterCarry;

    Inc(Left);
    Inc(Right);
    Inc(Result);
    Dec(LTail);
  end;

  LTail := LSize and CUnrollMask;
  LCount := LSize shr CunrollShift;

{$IFDEF CPUX64}
  Carry64 := Carry;
{$ENDIF}

  while LCount > 0 do
  begin
{$IFDEF CPUX64}
    Sum64 := PUInt64(Left)[0] + Carry64;
    PUInt64(Result)[0] := Sum64;
    Carry64 := Ord(Sum64 < Carry64);

    Sum64 := PUInt64(Left)[1] + Carry64;
    PUInt64(Result)[1] := Sum64;
    Carry64 := Ord(Sum64 < Carry64);
{$ELSE}
    Sum := Left[0] + Carry;
    Result[0] := Sum;
    Carry := TLimb(Sum < Carry);

    Sum := Left[1] + Carry;
    Result[1] := Sum;
    Carry := TLimb(Sum < Carry);

    Sum := Left[2] + Carry;
    Result[2] := Sum;
    Carry := TLimb(Sum < Carry);

    Sum := Left[3] + Carry;
    Result[3] := Sum;
    Carry := TLimb(Sum < Carry);
{$ENDIF}

    Inc(Left, CUnrollIncrement);
    Inc(Result, CUnrollIncrement);
    Dec(LCount);
  end;

{$IFDEF CPUX64}
  Carry := Carry64;
{$ENDIF}

  while LTail > 0 do
  begin
    Sum := Left[0] + Carry;
    Result[0] := Sum;
    Carry := TLimb(Sum < Carry);

    Inc(Left);
    Inc(Result);
    Dec(LTail);
  end;

  Result[0] := Carry;
end;
{$ENDIF}

class procedure BigInteger.InternalMultiply(Left, Right, Result: PLimb; LSize, RSize: Integer);
{$IFDEF PUREPASCAL}
type
  TUInt64 = packed record
    Lo, Hi: TLimb;
  end;
var
  Product: UInt64;
  LRest, LCount: Integer;
  CurrentRightLimb: TLimb;
  PLeft, PDest, PRight, PDestRowStart: PLimb;
begin
  if RSize > LSize then
  begin
    PDest := Left;
    Left := Right;
    Right := PDest;
    LRest := LSize;
    LSize := RSize;
    RSize := LRest;
  end;

  PRight := Right;
  PDestRowStart := Result;

  PLeft := Left;
  PDest := PDestRowStart;
  Inc(PDestRowStart);
  CurrentRightLimb := PRight^;
  Inc(PRight);
  TUInt64(Product).Hi := 0;
  Dec(RSize);
  LCount := LSize;

  while LCount > 0 do
  begin
    Product := UInt64(PLeft^) * CurrentRightLimb + TUInt64(Product).Hi;
    PDest^ := TUInt64(Product).Lo;
    Inc(PLeft);
    Inc(PDest);
    Dec(LCount);
  end;
  PDest^ := TUInt64(Product).Hi;

  LRest := LSize and CUnrollMask; // Low 2 bits: 0..3.
  LSize := LSize shr CUnrollShift; // Divide by 4.
  while RSize > 0 do
  begin
    PLeft := Left;
    PDest := PDestRowStart;
    Inc(PDestRowStart);
    CurrentRightLimb := PRight^;
    Inc(PRight);

    if CurrentRightLimb <> 0 then
    begin
      TUInt64(Product).Hi := 0;
      LCount := LSize;

      // Inner loop, unrolled.
      while LCount > 0 do
      begin
        Product := UInt64(PLeft[0]) * CurrentRightLimb + PDest[0] + TUInt64(Product).Hi;
        PDest[0] := TUInt64(Product).Lo;
        Product := UInt64(PLeft[1]) * CurrentRightLimb + PDest[1] + TUInt64(Product).Hi;
        PDest[1] := TUInt64(Product).Lo;
        Product := UInt64(PLeft[2]) * CurrentRightLimb + PDest[2] + TUInt64(Product).Hi;
        PDest[2] := TUInt64(Product).Lo;
        Product := UInt64(PLeft[3]) * CurrentRightLimb + PDest[3] + TUInt64(Product).Hi;
        PDest[3] := TUInt64(Product).Lo;

        Inc(PLeft, CUnrollIncrement);
        Inc(PDest, CunrollIncrement);
        Dec(LCount);
      end;

      // Rest loop.
      LCount := LRest;
      while LCount > 0 do
      begin
        Product := UInt64(PLeft^) * CurrentRightLimb + PDest^ + TUInt64(Product).Hi;
        PDest^ := TUInt64(Product).Lo;
        Inc(PLeft);
        Inc(PDest);
        Dec(LCount);
      end;

      // Last (top) limb of this row.
      PDest^ := TUInt64(Product).Hi;
    end;
    Dec(RSize);
  end;
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32)}
var
  SaveResult: PLimb;
  LRest, LCount: Integer;
  PRight, PDestRowStart: PLimb;
  LLeft, LRight: PLimb;
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     SaveResult,ECX

        MOV     ESI,LSize
        MOV     EDI,RSize
        CMP     ESI,EDI
        JA      @SkipSwap

        XCHG    EAX,EDX
        XCHG    ESI,EDI
        MOV     LSize,ESI
        MOV     RSize,EDI

// The longest loop should ideally be unrolled. After this, Left should be longer or same length.

@SkipSwap:

        MOV     LLeft,EAX
        MOV     LRight,EDX

// First loop, setting up first row:

        MOV     PRight,EDX
        MOV     EDI,SaveResult
        MOV     PDestRowStart,EDI               // EDI = PDest

        MOV     ESI,LLeft                       // ESI = PLeft

// If CurrentLimbRight = 0, we can skip a lot, and simply do a FillChar

        MOV     ECX,[EDX]                       // CurrentRightLimb
        XOR     EBX,EBX                         // PreviousProductHi
        ADD     PDestRowStart,CLimbSize
        ADD     PRight,CLimbSize
        MOV     EAX,LSize
        MOV     LCount,EAX

// The setup loop fills the row without an attempt to add to the data already in the result.

@SetupLoop:

        MOV     EAX,[ESI]
        MUL     EAX,ECX                         // Uses MUL EAX,ECX syntax because of bug in XE2 assembler.
        ADD     EAX,EBX
        ADC     EDX,0
        MOV     [EDI],EAX
        MOV     EBX,EDX
        LEA     ESI,[ESI + CLimbSize]
        LEA     EDI,[EDI + CLimbSize]
        DEC     LCount
        JNE     @SetupLoop
        MOV     [EDI],EDX

        MOV     EAX,LSize
        MOV     EDX,EAX
        SHR     EAX,CUnrollShift
        MOV     LSize,EAX
        AND     EDX,CUnrollMask
        MOV     LRest,EDX

        DEC     RSize
        JE      @Exit

// The outer loop iterates over the limbs of the shorter operand. After the setup loop, the lowest limb
// has already been taken care of.

@OuterLoop:

        MOV     ESI,LLeft
        MOV     EDI,PDestRowStart
        ADD     PDestRowStart,CLimbSize
        MOV     EAX,PRight
        ADD     PRight,CLimbSize

// If PRight^ is 0, then we can skip multiplication for the entire row.

        MOV     ECX,[EAX]
        TEST    ECX,ECX
        JE      @NextOuterLoop

        XOR     EBX,EBX
        MOV     EAX,LSize
        MOV     LCount,EAX
        CMP     EAX,0
        JE      @EndInnerLoop

@InnerLoop:

        // Loop unrolled. Approx. 70% faster than simple loop.

        MOV     EAX,[ESI]
        MUL     EAX,ECX
        ADD     EAX,[EDI]
        ADC     EDX,0
        ADD     EAX,EBX
        ADC     EDX,0
        MOV     [EDI],EAX
        MOV     EBX,EDX

        MOV     EAX,[ESI + CLimbSize]
        MUL     EAX,ECX
        ADD     EAX,[EDI + CLimbSize]
        ADC     EDX,0
        ADD     EAX,EBX
        ADC     EDX,0
        MOV     [EDI + CLimbSize],EAX
        MOV     EBX,EDX

        MOV     EAX,[ESI + 2*CLimbSize]
        MUL     EAX,ECX
        ADD     EAX,[EDI + 2*CLimbSize]
        ADC     EDX,0
        ADD     EAX,EBX
        ADC     EDX,0
        MOV     [EDI + 2*CLimbSize],EAX
        MOV     EBX,EDX

        MOV     EAX,[ESI + 3*CLimbSize]
        MUL     EAX,ECX
        ADD     EAX,[EDI + 3*CLimbSize]
        ADC     EDX,0
        ADD     EAX,EBX
        ADC     EDX,0
        MOV     [EDI + 3*CLimbSize],EAX
        MOV     EBX,EDX

        LEA     ESI,[ESI + 4*CLimbSize]
        LEA     EDI,[EDI + 4*CLimbSize]
        DEC     LCount
        JNE     @InnerLoop

@EndInnerLoop:

        // The restant limbs to be handled.

        MOV     EAX,LRest
        MOV     LCount,EAX
        CMP     EAX,0
        JE      @EndInnerRestLoop

@InnerRestLoop:

        MOV     EAX,[ESI]
        MUL     EAX,ECX
        ADD     EAX,EBX
        ADC     EDX,0
        ADD     EAX,[EDI]
        ADC     EDX,0
        MOV     [EDI],EAX
        MOV     EBX,EDX
        LEA     ESI,[ESI + CLimbSize]
        LEA     EDI,[EDI + CLimbSize]
        DEC     LCount
        JNE     @InnerRestLoop

@EndInnerRestLoop:

        // The last (left) limb gets the top of the 64 bit product.

        MOV     [EDI],EBX

@NextOuterLoop:

        DEC     RSize
        JNE     @OuterLoop

@Exit:
        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}

// This uses 64 bit multiplication as much as possible. The logic handles any odd (top) limbs especially.

var
  LeftOdd, RightOdd: Boolean;                   // Left, Right (resp.): odd number of limbs?
  SaveLeft: PLimb;
  LeftSize, RightSize: Integer;
asm
        .PUSHNV RSI
        .PUSHNV RDI
        .PUSHNV RBX

        MOV     EDI,RSize
        CMP     R9D,EDI
        JAE     @SwapEnd

        XCHG    RCX,RDX
        XCHG    R9D,EDI

@SwapEnd:

        MOV     SaveLeft,RCX

        MOV     EAX,R9D
        SHR     R9D,1
        MOV     LeftSize,R9D            // Number of double limbs of Left
        AND     AL,1
        MOV     LeftOdd,AL              // Does Left have an odd number of limbs?

        MOV     EAX,EDI
        SHR     EDI,1
        MOV     RightSize,EDI           // Number of double limbs of Right
        AND     AL,1
        MOV     RightOdd,AL             // Does Right have an odd number of limbs?

        MOV     R10,RDX                 // Current limb to be multiplied
        XOR     RBX,RBX                 // Top DWORD (EDX) of previous multiplication

        // If no more 64 bit limbs in Right, we must skip to final odd limb.

        CMP     RightSize,0
        JE      @FinalOddPart

        MOV     RCX,[R10]               // Current Right limb's value
        MOV     RDI,R8                  // Result limb pointer
        MOV     RSI,SaveLeft            // Left limb pointer
        ADD     R8,DLimbSize            // Result's pointer to start of current row
        ADD     R10,DLimbSize           // Current Right limb pointer

        MOV     R11D,LeftSize           // Loop counter
        CMP     R11D,0
        JE      @SetupOddPart

// Setup loop (64 bit part)

@SetupLoop64:

        MOV     RAX,[RSI]
        MUL     RCX
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI],RAX
        MOV     RBX,RDX
        LEA     RSI,[RSI + DLimbSize]
        LEA     RDI,[RDI + DLimbSize]
        DEC     R11D
        JNE     @SetupLoop64

// Setup loop, last limb ("odd" part).

@SetupOddPart:

        CMP     LeftOdd,0
        JE      @SkipSetupOddPart

        MOV     EAX,[RSI]               // 32 bit register to read odd limb of this loop
        MUL     RCX
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI],RAX
        MOV     [RDI + DLimbSize],RDX
        JMP     @SkipSkipSetupOddPart

@SkipSetupOddPart:

        MOV     [RDI],RDX

@SkipSkipSetupOddPart:

        DEC     RightSize
        JE      @FinalOddPart

@OuterLoop:

        MOV     RDI,R8
        ADD     R8,DLimbSize
        MOV     RCX,[R10]
        ADD     R10,DLimbSize

        TEST    RCX,RCX
        JE      @NextOuterLoop

        MOV     RSI,SaveLeft
        XOR     RBX,RBX
        MOV     R11D,LeftSize
        CMP     R11D,0
        JE      @InnerLoopOddPart

        SHR     R11D,CUnrollShift
        JE      @InnerTail64

@InnerLoop64:

        MOV     RAX,[RSI]               // Get double limb from Left data
        MUL     RCX                     // multiply it with current Right double limb's value --> RDX:RAX
        ADD     RAX,[RDI]               // Add current value in Result data
        ADC     RDX,0
        ADD     RAX,RBX                 // Add "carry", i.e. top double limb from previous multiplcation
        ADC     RDX,0
        MOV     [RDI],RAX               // Store in Result
        MOV     RBX,RDX                 // And save top double limb as "carry".

        MOV     RAX,[RSI + DLimbSize]
        MUL     RCX
        ADD     RAX,[RDI + DLimbSize]
        ADC     RDX,0
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI + DLimbSize],RAX
        MOV     RBX,RDX

        MOV     RAX,[RSI + 2*DLimbSize]
        MUL     RCX
        ADD     RAX,[RDI + 2*DLimbSize]
        ADC     RDX,0
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI + 2*DLimbSize],RAX
        MOV     RBX,RDX

        MOV     RAX,[RSI + 3*DLimbSize]
        MUL     RCX
        ADD     RAX,[RDI + 3*DLimbSize]
        ADC     RDX,0
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI + 3*DLimbSize],RAX
        MOV     RBX,RDX

        LEA     RSI,[RSI + 4*DLimbSize]
        LEA     RDI,[RDI + 4*DLimbSize]
        DEC     R11D
        JNE     @InnerLoop64

@InnerTail64:

        MOV     R11D,LeftSize
        AND     R11D,CUnrollMask
        JE      @InnerLoopOddPart

@InnerTailLoop64:

        MOV     RAX,[RSI]
        MUL     RCX
        ADD     RAX,[RDI]
        ADC     RDX,0
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI],RAX
        MOV     RBX,RDX
        LEA     RSI,[RSI + DLimbSize]
        LEA     RDI,[RDI + DLimbSize]
        DEC     R11D
        JNE     @InnerTailLoop64

@InnerLoopOddPart:

        CMP     LeftOdd,0               // If Left's size is odd, handle last limb.
        JE      @InnerLoopLastLimb

        MOV     RAX,[RSI]
        MUL     RCX
        ADD     RAX,[RDI]
        ADC     RDX,0
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI],RAX
        MOV     [RDI + DLimbSize],RDX
        JMP     @NextOuterLoop

@InnerLoopLastLimb:

        MOV     [RDI],RDX

@NextOuterLoop:

        DEC     RightSize
        JNE     @OuterLoop

@FinalOddPart:

        CMP     RightOdd,0
        JE      @Exit

        MOV     RDI,R8
        MOV     RSI,SaveLeft
        MOV     RAX,R10
        MOV     ECX,[RAX]                      // Right is odd, so read single TLimb
        XOR     RBX,RBX
        MOV     R11D,LeftSize
        CMP     R11D,0
        JE      @SkipFinalLoop

@FinalLoop:

        MOV     RAX,[RSI]
        MUL     RCX
        ADD     RAX,[RDI]
        ADC     RDX,0
        ADD     RAX,RBX
        ADC     RDX,0
        MOV     [RDI],RAX
        MOV     RBX,RDX
        LEA     ESI,[ESI + DLimbSize]
        LEA     EDI,[EDI + DLimbSize]
        DEC     R11D
        JNE     @FinalLoop

@SkipFinalLoop:

        CMP    LeftOdd,0
        JE     @LastLimb

        MOV    EAX,[RSI]
        MUL    RCX
        ADD    RAX,[RDI]
        ADC    RDX,0
        ADD    RAX,RBX
        ADC    RDX,0
        MOV    [RDI],RAX
        MOV    [RDI + DLimbSize],RDX
        JMP    @Exit

@LastLimb:

        MOV    [RDI],RDX

@Exit:

end;
{$ENDIF !WIN32}
{$ENDIF !PUREPASCAL}

function BigInteger.ToBinaryString: string;
begin
  Result := ToString(2);
end;

function BigInteger.ToByteArray: TArray<Byte>;
var
  Neg: TMagnitude;
  Bytes, Bits: Integer;
  ExtraByte: Byte;
begin
  if IsZero then
  begin
    SetLength(Result, 1);
    Result[0] := 0;
    Exit;
  end;

  Bytes := BitLength;
  Bits := Bytes and $07;
  Bytes := (Bytes + 7) shr 3;
  if FSize > 0 then
  begin
    Neg := FData;
    ExtraByte := $00;
  end
  else
  begin
    SetLength(Neg, Size);
    InternalNegate(PLimb(FData), PLimb(Neg), Size);
    ExtraByte := $FF;
  end;
  SetLength(Result, Bytes + Byte(Bits = 0));
  Move(Neg[0], Result[0], Bytes);
  if Bits = 0 then
    Result[Bytes] := ExtraByte;
end;

function BigInteger.ToDecimalString: string;
begin
  Result := ToString(10);
end;

function BigInteger.ToHexString: string;
begin
  Result := ToString(16);
end;

function BigInteger.ToOctalString: string;
begin
  Result := ToString(8);
end;

function BigInteger.ToString: string;
begin
  Result := ToString(FBase);
end;

function BigInteger.ToString(Base: Integer): string;
var
  P: PChar;
  LBuffer: TArray<Char>;
  LMagnitude: PLimb;
  LSize: Integer;
begin
  if not Base in [2..36] then
    Error(ecInvalidArgument);
  if FData = nil then
  begin
    Result := '0';
    Exit;
  end;
  LSize := FSize and SizeMask;
  SetLength(LBuffer, LSize * CStringMaxLengths[Base] + 1);
  LMagnitude := PLimb(System.Copy(FData));
  P := PChar(LBuffer) + Length(LBuffer);
  Dec(P);
  P^ := #0;
  while LSize > 0 do
  begin
    Dec(P);
    P^ := CBaseChars[InternalDivideByBase(LMagnitude, Base, LSize)];
  end;
  if FSize < 0 then
  begin
    Dec(P);
    P^ := '-';
  end;
  Result := P;
end;

// By default, uses FBase as numeric base, otherwise, if string "starts" with $, 0x, 0b or 0o, uses 16, 16 (both hex), 2 (binary) and 8 (octal) respectively.
class function BigInteger.TryParse(const S: string; out Res: BigInteger; aBase : Integer): Boolean;
var
  LTrimmed: string;
  LIsNegative: Boolean;
  P: PChar;
  LBase, LBaseNew: Integer;
begin
  Result := False;
  LTrimmed := UpperCase(Trim(S)); // Make string case insensitive.
  if LTrimmed = '' then
    Exit;
  LIsNegative := False;
  P := PChar(LTrimmed);
  if (P^ = '-') or (P^ = '+') then
  begin
    LIsNegative := (P^ = '-');
    Inc(P);
  end;
  LBase := aBase;               // By default, use global numeric base.
  case P^ of
    '$':                        // $ prefix indicates hexadecimal (equivalent to 0x and %16r)
      begin
        Inc(P);
        LBase := 16;
      end;
    '0':
      begin
        Inc(P);
        case P^ of
          #0:
            begin
              Res := Zero;
              Exit(True);
            end;
          'B':                  // 0b prefix indicates binary (equivalent to %2r)
            LBase := 2;
          'O', 'K':             // 0o17, 0k17 prefixes indicate octal (equivalent to %8r)
            LBase := 8;
          'X':                  // 0x prefix indicates hexadecimal (equivalent to $ and %16r)
            LBase := 16;
          'D':
            LBase := 10;
          else
            Dec(P);
        end;
        Inc(P);
      end;
    '%':                        // %nnr prefix indicates base n (nn is always decimal)
      begin
        Inc(P);
        LBaseNew := 0;
        while P^ <> 'R' do
        begin
          if P^ = #0 then
            Exit;
          LBaseNew := LBaseNew * 10 + Ord(P^) - CNumBase;
          Inc(P);
        end;
        Inc(P);
        if not (LBaseNew in [2..36]) then
          Exit;
        LBase := LBaseNew;
      end;
  end;
  Result := TryParse(P, LBase, Res);
  if Result and LIsNegative then
    Res := -Res;
end;

class function BigInteger.TryParse(const S: string; Base: TNumberBase; out Res: BigInteger): Boolean;
var
  LIsNegative: Boolean;
  LTrimmed: string;
  LVal: Integer;
  P: PChar;
begin
  Result := False;
  LTrimmed := Trim(S);
  if LTrimmed = '' then
    Exit;
  LIsNegative := False;
  Res.FSize := 0;
  Res.MakeSize(Length(S) div CStringMinLengths[Base] + 4);
  P := PChar(LTrimmed);
  if (P^ = '-') or (P^ = '+') then
  begin
    LIsNegative := (P^ = '-');
    Inc(P);
  end;
  while P^ <> #0 do
  begin
    if (P^ = '_') or (P^ = ' ') or (P = FormatSettings.ThousandSeparator) then
    begin
      Inc(P);
      Continue;
    end;
    LVal := Ord(P^);
    Inc(P);
    if LVal in [Ord('0')..Ord('9')] then
      Dec(LVal, CNumBase)
    else if LVal >= CAlphaBase then
    begin
      if LVal >= Ord('a') then
        Dec(LVal, 32);
      Dec(LVal, CAlphaBase - 10);
    end
    else
      Exit;
    if LVal >= Base then
      Exit;
    InternalMultiplyAndAdd(Res.FData, Base, LVal, Res.FData);
  end;
  if LIsNegative then
    Res := -Res;
  Result := True;
//  Res.Compact;
end;

class procedure BigInteger.Decimal;
begin
  FBase := 10;
end;

class function BigInteger.Divide(const Left: BigInteger; Right: UInt16): BigInteger;
var
  LSign: Integer;
begin
  if Right = 0 then
    Error(ecDivByZero);
  if Left.FData = nil then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;
  LSign := Left.FSize and SignMask;
  Result.MakeSize(Left.FSize and SizeMask);
  InternalDivMod16(PLimb(Left.FData), Right, PLImb(Result.FData), nil, Left.FSize and SizeMask);
  Result.Compact;
  if Result.FData <> nil then
    Result.FSize := (Result.FSize and SizeMask) or LSign;
end;

class function BigInteger.Divide(const Left: BigInteger; Right: UInt32): BigInteger;
var
  LSign: Integer;
begin
  if Right = 0 then
    Error(ecDivByZero);
  if Left.FData = nil then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;
  LSign := Left.FSize and SignMask;
  Result.MakeSize(Left.FSize and SizeMask);
  InternalDivMod32(PLimb(Left.FData), Right, PLimb(Result.FData), nil, Left.FSize and SizeMask);
  Result.Compact;
  if Result.FData <> nil then
    Result.FSize := (Result.FSize and SizeMask) or LSign;
end;

class function BigInteger.Divide(const Left, Right: BigInteger): BigInteger;
var
  Sign, LSize, RSize: Integer;
  Remainder: BigInteger;
begin
  if Right.FData = nil then
    Error(ecDivByZero);

  Sign := (Left.FSize and SignMask) xor (Right.FSize and SignMask);
  LSize := Left.FSize and SizeMask;
  RSize := Right.FSize and SizeMask;

  case InternalCompare(PLimb(Left.FData), PLimb(Right.FData), LSize, RSize) of
    -1:
      begin
        ShallowCopy(Zero, Result);
      end;
    0:
      begin
        if Sign = 0 then
          ShallowCopy(One, Result)
        else
          ShallowCopy(MinusOne, Result);
      end;
    else
      begin
        if ShouldUseBurnikelZiegler(LSize, RSize) then
          DivModBurnikelZiegler(Left, Right, Result, Remainder)
        else
          DivModKnuth(Left, Right, Result, Remainder);

        if Result.FSize <> 0 then
          Result.FSize := (Result.FSize and SizeMask) or Sign;
      end;
  end;
end;

function BigInteger.Divide(const Other: BigInteger): PBigInteger;
begin
  Result := @Self;
  Self := Self div Other;
end;

class procedure BigInteger.DivMod(const Dividend, Divisor: BigInteger; var Quotient, Remainder: BigInteger);
var
  LSign, RSign: Integer;
  LSize, RSize: Integer;
begin
  if Divisor.FData = nil then
    Error(ecDivByZero);

  LSign := SignBitOf(Dividend.FSize);
  RSign := SignBitOf(Divisor.FSize);
  LSize := Dividend.FSize and SizeMask;
  RSize := Divisor.FSize and SizeMask;

  case InternalCompare(PLimb(Dividend.FData), PLimb(Divisor.FData), LSize, RSize) of
    -1:
      begin
        ShallowCopy(Dividend, Remainder);
        ShallowCopy(Zero, Quotient);
        Exit;
      end;
    0:
      begin
        ShallowCopy(Zero, Remainder);
        if LSign = RSign then
          ShallowCopy(One, Quotient)
        else
          ShallowCopy(MinusOne, Quotient);
        Exit;
      end
    else
      begin
        if ShouldUseBurnikelZiegler(LSize, RSize) then
          DivModBurnikelZiegler(Dividend, Divisor, Quotient, Remainder)
        else
          DivModKnuth(Dividend, Divisor, Quotient, Remainder);

//        if Quotient.FSize <> 0 then
//          Quotient.FSize := (Quotient.FSize and SizeMask) or (LSign xor RSign);
//        if Remainder.FSize <> 0 then
//          Remainder.FSize := (Remainder.FSize and SizeMask) or LSign;
      end;
  end;
end;

class procedure BigInteger.DivModKnuth(const Left, Right: BigInteger; var Quotient, Remainder: BigInteger);
var
  LSign, RSign: Integer;
  LSize, RSize: Integer;
begin
  if Right.FData = nil then
    Error(ecDivByZero);

  LSign := SignBitOf(Left.FSize);
  RSign := SignBitOf(Right.FSize);
  LSize := Left.FSize and SizeMask;
  RSize := Right.FSize and SizeMask;

  case InternalCompare(PLimb(Left.FData), PLimb(Right.FData), LSize, RSize) of
    -1:
      begin
        ShallowCopy(Left, Remainder);
        ShallowCopy(Zero, Quotient);
        Exit;
      end;
    0:
      begin
        ShallowCopy(Zero, Remainder);
        if LSign = RSign then
          ShallowCopy(One, Quotient)
        else
          ShallowCopy(MinusOne, Quotient);
        Exit;
      end
    else
      begin
        Quotient.MakeSize(LSize - RSize + 1);
        Remainder.MakeSize(RSize);
        if not InternalDivMod(PLimb(Left.FData), PLimb(Right.FData), PLimb(Quotient.FData), PLimb(Remainder.FData), LSize, RSize) then
          Error(ecInvalidArg);
        Quotient.Compact;
        Remainder.Compact;

        if Quotient.FSize <> 0 then
          Quotient.FSize := (Quotient.FSize and SizeMask) or (LSign xor RSign);
        if Remainder.FSize <> 0 then
          Remainder.FSize := (Remainder.FSize and SizeMask) or LSign;
      end;
  end;
end;

class procedure BigInteger.InternalShiftLeft(Source, Dest: PLimb; Shift, Size: Integer);
{$IF DEFINED(PUREPASCAL)}
var
  I: Integer;
begin
  Shift := Shift and 31;
  if Shift = 0 then
    CopyLimbs(Source, Dest, Size)
  else
  begin
    Dest[Size] := Source[Size - 1] shr (CLimbBits - Shift);
    for I := Size - 1 downto 1 do
      Dest[I] := (Source[I] shl Shift) or (Source[I - 1] shr (CLimbBits - Shift));
    Dest[0] := Source[0] shl Shift;
  end;
end;
{$ELSEIF DEFINED(WIN32)}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX
        MOV     EDI,EDX

        // No need to test for nil.
        MOV     EBX,Size

        MOV     EAX,[ESI + CLimbSize*EBX]
        DEC     EBX
        JS      @LoopEnd

@ShiftLoop:

        MOV     EDX,[ESI + CLimbSize*EBX]
        SHLD    EAX,EDX,CL
        MOV     [EDI + CLimbSize*EBX + CLimbSize],EAX
        MOV     EAX,EDX

@ShiftStart:

        DEC     EBX
        JNS     @ShiftLoop

@LoopEnd:

        SHL     EAX,CL
        MOV     [EDI],EAX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE}
asm
        XCHG    RCX,R8
        MOV     R10,RDX

        MOV     EAX,[R8 + CLimbSize*R9]
        DEC     R9D
        JS      @LoopEnd

@ShiftLoop:

        MOV     EDX,[R8 + CLimbSize*R9]
        SHLD    EAX,EDX,CL
        MOV     [R10 + CLimbSize*R9 + CLimbSize],EAX
        MOV     EAX,EDX

@ShiftStart:

        DEC     R9D
        JNS     @ShiftLoop

@LoopEnd:

        SHL     EAX,CL
        MOV     [R10],EAX

@Exit:
end;
{$IFEND}

class procedure BigInteger.InternalShiftRight(Source, Dest: PLimb; Shift, Size: Integer);
{$IF DEFINED(PUREPASCAL)}
var
  I: Integer;
begin
  Shift := Shift and 31;
  if Shift = 0 then
    CopyLimbs(Source, Dest, Size)
  else
  begin
    for I := 0 to Size - 1 do
      Dest[I] := (Source[I] shr Shift) or (Source[I + 1] shl (CLimbBits - Shift));
    Dest[Size - 1] := Source[Size - 1] shr Shift;
  end;
end;
{$ELSEIF DEFINED(WIN32)}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX
        MOV     EDI,EDX
        MOV     EBX,Size
        MOV     EAX,[ESI]
        LEA     ESI,[ESI + CLimbSize]
        DEC     EBX
        JE      @EndLoop

@ShiftLoop:

        MOV     EDX,[ESI]
        SHRD    EAX,EDX,CL
        MOV     [EDI],EAX
        MOV     EAX,EDX
        LEA     ESI,[ESI + CLimbSize]
        LEA     EDI,[EDI + CLimbSize]
        DEC     EBX
        JNE     @ShiftLoop

@EndLoop:

        SHR     EAX,CL
        MOV     [EDI],EAX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE}
asm
        XCHG    RCX,R8                        // R8 = source, ECX = shift

        MOV     EAX,[R8]
        LEA     R8,[R8 + CLimbSize]
        DEC     R9D
        JE      @LoopEnd

@ShiftLoop:

        MOV     R10D,[R8]
        SHRD    EAX,R10D,CL
        MOV     [RDX],EAX
        MOV     EAX,R10D
        LEA     RDX,[RDX + CLimbSize]
        LEA     R8,[R8 + CLimbSize]
        DEC     R9D
        JNE     @ShiftLoop

@LoopEnd:

        SHR     EAX,CL
        MOV     [RDX],EAX

@Exit:

end;
{$IFEND}

type
{$IFDEF CPUX64}
  TDivLimb = UInt32;
  TDblLimb = UInt64;
{$ELSE}
  TDivLimb = UInt16;
  TDblLimb = UInt32;
{$ENDIF}
  PDivLimb = ^TDivLimb;
  PDblLimb = ^TDblLimb;

const
  CDivLimbBase = TDblLimb(High(TDivLimb)) + 1;
  CDivLimbBits = SizeOf(TDivLimb) * 8;
  CDblLimbBits = SizeOf(TDblLimb) * 8;

class function BigInteger.InternalDivMod16(Dividend: PLimb; Divisor: UInt16; Quotient, Remainder: PLimb; LSize: Integer): Boolean;
{$IFDEF PUREPASCAL}
// In PUREPASCAL, using 16-bit division with an intermediate 32-bit result turned out to be faster than
// 32-bit division with an intermediate 64-bit result.
type
  PUInt16 = ^UInt16;
var
  J: Integer;
  LRemainder: UInt16;
begin
  LSize := LSize + LSize;

  LRemainder := 0;
  for J := LSize - 1 downto 0 do
    Math.DivMod(Cardinal(LRemainder shl 16 + PUInt16(Dividend)[J]), Divisor, PUInt16(Quotient)[J], LRemainder);

  if Remainder <> nil then
    Remainder[0] := LRemainder;
  Exit(True);
end;
{$ELSE !PUREPASCAL}
// In assembler, 32 bit division is faster, so promote divisor to 32 bit and use InternalDivMod32.
begin
  Result := InternalDivMod32(Dividend, UInt32(Divisor), Quotient, Remainder, LSize);
end;
{$ENDIF !PUREPASCAL}

class function BigInteger.InternalDivMod32(Dividend: PLimb; Divisor: UInt32; Quotient, Remainder: PLimb; LSize: Integer): Boolean;
{$IFDEF PUREPASCAL}
{$IFDEF CPUX86}
begin
  // In 32PP, plain division using System.Math.DivMod(UInt64, ...) is much slower than this:
  Result := InternalDivMod(Dividend, @Divisor, Quotient, Remainder, LSize, 1);
end;
{$ELSE CPUX64}
var
  J: Integer;
  LQuotient, LRemainder: UInt64;
begin
  LRemainder := 0;
  for J := LSize - 1 downto 0 do
  begin
    // DivMod(UInt64, UInt64, var UInt64, var UInt64)
    DivMod64(LRemainder * (UInt64(High(UInt32)) + 1) + Dividend[J], Divisor, LQuotient, LRemainder);
    Quotient[J] := TLimb(LQuotient);
  end;
  if Remainder <> nil then
    Remainder[0] := TLimb(LRemainder);
  Exit(True);
end;
{$ENDIF CPUX64}
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     EBX,EDX

        MOV     EDI,LSize
        LEA     ESI,[EAX + CLimbSize*EDI - CLimbSize]
        LEA     ECX,[ECX + CLimbSize*EDI - CLimbSize]
        XOR     EDX,EDX

        SHR     EDI,CUnrollShift
        JE      @Tail

@DivLoop:

        MOV     EAX,[ESI]
        DIV     EAX,EBX
        MOV     [ECX],EAX
        MOV     EAX,[ESI - CLimbSize]
        DIV     EAX,EBX
        MOV     [ECX - CLimbSize],EAX
        MOV     EAX,[ESI - 2 * CLimbSize]
        DIV     EAX,EBX
        MOV     [ECX - 2 * CLimbSize],EAX
        MOV     EAX,[ESI - 3 * CLimbSize]
        DIV     EAX,EBX
        MOV     [ECX - 3 * CLimbSize],EAX
        LEA     ESI,[ESI - 4 * CLimbSize]
        LEA     ECX,[ECX - 4 * CLimbSize]
        DEC     EDI
        JNE     @DivLoop

@Tail:

        MOV     EDI,LSize
        AND     EDI,CUnrollMask
        JE      @StoreRemainder

@TailLoop:

        MOV     EAX,[ESI]
        DIV     EAX,EBX
        MOV     [ECX],EAX
        LEA     ESI,[ESI - CLimbSize]
        LEA     ECX,[ECX - CLimbSize]
        DEC     EDI
        JNE     @TailLoop

@StoreRemainder:

        MOV     EBX,Remainder
        OR      EBX,EBX
        JE      @Exit

        MOV     [EBX],EDX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI

end;
{$ELSE WIN64}
asm
        MOV     R10D,EDX

        MOV     R11D,LSize
        LEA     RCX,[RCX + R11*CLimbSize]
        LEA     R8,[R8 + R11*CLimbSize]
        XOR     EDX,EDX

        SHR     R11D,CUnrollShift
        JE      @Tail

@DivLoop:

        // Note: 64 bit division turned out to be considerably slower!

        MOV     EAX,[RCX - CLimbSize]
        DIV     EAX,R10D                        // Uses DIV EAX,R10D syntax because of bug in XE 64 bit assembler.
        MOV     [R8 - CLimbSize],EAX

        MOV     EAX,[RCX - 2 * CLimbSize]
        DIV     EAX,R10D
        MOV     [R8 - 2 * CLimbSize],EAX

        MOV     EAX,[RCX - 3 * CLimbSize]
        DIV     EAX,R10D
        MOV     [R8 - 3 * CLimbSize],EAX

        MOV     EAX,[RCX - 4 * CLimbSize]
        DIV     EAX,R10D
        MOV     [R8 - 4 * CLimbSize],EAX

        LEA     RCX,[RCX - 4 * CLimbSize]
        LEA     R8,[R8 - 4 * CLimbSize]
        DEC     R11D
        JNE     @DivLoop

@Tail:

        MOV     R11D,LSize
        AND     R11D,CUnrollMask
        JE      @StoreRemainder

@TailLoop:

        MOV     EAX,[RCX - ClimbSize]
        DIV     EAX,R10D
        MOV     [R8 - CLimbSize],EAX
        LEA     RCX,[RCX - CLimbSize]
        LEA     R8,[R8 - CLimbSize]
        DEC     R11D
        JNE     @TailLoop

@StoreRemainder:

        OR      R9,R9
        JE      @Exit
        MOV     [R9],EDX

@Exit:

end;
{$ENDIF}
{$ENDIF PUREPASCAL}

class function BigInteger.InternalDivMod(Dividend, Divisor, Quotient, Remainder: PLimb; LSize, RSize: Integer): Boolean;

// Basecase division, see Knuth TAOCP, Vol. 2.

{$IF DEFINED(PUREPASCAL)}
var
  PDividend, PDivisor, PQuotient, PRemainder: PDivLimb;
  NormDividend, NormDivisor: TArray<TDivLimb>;          // Normalized dividend and divisor
  QHat: TDblLimb;                                       // Estimate quotient limb
  RHat: TDblLimb;                                       // Remainder after calculating QHat
  Product: TDblLimb;                                    // Product of limb and QHat
  Shift, RevShift, I, J: Integer;                       // Help variables
  NormDividendTop2, NormDivisorTop: TDblLimb;
{$IF SizeOf(TDivLimb) = SizeOf(TLimb)}
  Rem, Quot: UInt64;
  Carry, Value: Int64;
{$ELSE}
//  Rem: TDivLimb;
  Carry, Value: Integer;
{$IFEND}
begin

  Assert(SizeOf(TDblLimb) = 2 * SizeOf(TDivLimb));
  PDividend := PDivLimb(Dividend);
  PDivisor := PDivLimb(Divisor);
  PQuotient := PDivLimb(Quotient);
  PRemainder := PDivLimb(Remainder);

{$IF SizeOf(TLimb) > SizeOf(TDivLimb)}
  LSize := LSize + LSize;
  RSize := RSize + RSize;

  if PDivisor[RSize - 1] = 0 then
    Dec(RSize);
{$IFEND}

  // NOTE: In Win32, this uses 16-bit division (with 32-bit intermediate results) to avoid having to use
  //       64-bit unsigned integers. This turned out to be (approx. 17%) faster than using 32-bit limbs.
  //       In Win64, this uses 32-bit division with 64-bit intermediate results.

  if (LSize < RSize) then
    Exit(False);

  while (RSize > 0) and (PDivisor[RSize - 1] = 0) do
    Dec(RSize);
  if RSize = 0 then
    Exit(False);

  while (LSize > 0) and (PDividend[LSize - 1] = 0) do
    Dec(LSize);


  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
  /// Perhaps it makes sense to shift away common trailing zeroes, if divisor > certain size.              ///
  /// Shifting should be pretty simple: simply remove any common zeroes in both dividend and divisor,      ///
  /// generate an offset to the lowest non-zero limb and shift accordingly (when normalizing).             ///
  /// Note that the remainder must be amended accordingly.                                                 ///
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////


  if RSize = 1 then
  begin

    // Handle single-digit divisor.

  {$IF SizeOf(TDivLimb) = SizeOf(TLimb)}
    Exit(InternalDivMod32(Dividend, PDivisor[0], Quotient, Remainder, LSize));
  {$ELSE}
    Exit(InternalDivMod16(Dividend, PDivisor[0], Quotient, Remainder, (LSize + 1) div 2));
  {$IFEND}

//    Rem := 0;
//    for J := LSize - 1 downto 0 do
//  {$IF SizeOf(TDivLimb) = SizeOf(TLimb)}
//    begin
//      // DivMod(UInt64, UInt64, var UInt64, var UInt64)
//      System.Math.DivMod(Rem * CDivLimbBase + PDividend[J], PDivisor[0], Quot, Rem);
//      PQuotient[J] := TDivLimb(Quot);
//    end;
//  {$ELSE}
//      // DivMod(Cardinal, Word, var Word, var Word)
//      System.Math.DivMod(Cardinal(Rem * CDivLimbBase + PDividend[J]), PDivisor[0], PQuotient[J], Rem);
//  {$IFEND}
//    if PRemainder <> nil then
//      PRemainder[0] := TDivLimb(Rem);
//    Exit(True);
  end;
  // Normalize by shifting divisor left just enough so that its high-order bit is set, and shift dividend left the
  // same amount. A high-order digit is prepended to dividend unconditionally.

  // Get number of leading zeroes.
  Shift := Velthuis.Numerics.NumberOfleadingZeros(PDivisor[RSize - 1]);            // 0 <= Shift < Bits.
  RevShift := CDivLimbBits - Shift;

  // Normalize divisor and shift dividend left accordingly.
  SetLength(NormDivisor, RSize);
  SetLength(NormDividend, LSize + 1);
  if Shift > 0 then
  begin
    for I := RSize - 1 downto 1 do
      NormDivisor[I] := (PDivisor[I] shl Shift) or (PDivisor[I - 1] shr RevShift);
    NormDivisor[0] := PDivisor[0] shl Shift;

    NormDividend[LSize] := PDividend[LSize - 1] shr RevShift;
    for I := LSize - 1 downto 1 do
      NormDividend[I] := (PDividend[I] shl Shift) or (PDividend[I - 1] shr RevShift);
    NormDividend[0] := PDividend[0] shl Shift;
  end
  else
  begin
    // SizeOf(TDivLimb) is not always SizeOf(TLimb), so don't use MoveLimbs() here.
    Move(PDivisor[0], NormDivisor[0], RSize * SizeOf(TDivLimb));
    Move(PDividend[0], NormDividend[0], LSize * SizeOf(TDivLimb));
  end;

  // Knuth's basecase algorithm.

  // Main loop.
  for J := LSize - RSize downto 0 do
  begin
    NormDivisorTop := NormDivisor[RSize - 1];
    NormDividendTop2 := PDblLimb(@NormDividend[J + RSize - 1])^;

    // QHat -- q^ in TAOCP -- is (first) estimate of Quotient[J]
    QHat := NormDividendTop2 div NormDivisorTop;

    // RHat -- r^ in TAOCP -- is remainder belonging to q^.
    RHat := NormDividendTop2 - QHat * NormDivisorTop;

    while (QHat * NormDivisor[RSize - 2] > RHat shl CDivLimbBits + NormDividend[J + RSize - 2]) or
          (QHat >= CDivLimbBase) do
    begin
      Dec(QHat);
      Inc(RHat, NormDivisorTop);

      if RHat >= CDivLimbBase then
        Break;
    end;

    // Multiply and subtract.
    Carry := 0;
    for I := 0 to RSize - 1 do
    begin
      Product := QHat * NormDivisor[I];
      Value := NormDividend[I + J] - Carry - TDivLimb(Product);
      NormDividend[I + J] := TDivLimb(Value);
    {$IF SizeOf(TLimb) = SizeOf(TDivLimb)}
      // Integer cast to force sign-extension of 'Value shr Bits'
      Carry := Int64(Product shr CDivLimbBits) - Integer(Value shr CDivLimbBits);
    {$ELSE}
      // Smallint cast to force sign-extension of 'Value shr Bits'
      Carry := Integer(Product shr CDivLimbBits) - Smallint(Value shr CDivLimbBits);
    {$IFEND}
    end;
    Value := NormDividend[J + RSize] - Carry;
    NormDividend[J + RSize] := Value;

    if Value < 0 then
    begin

      // If too much was subtracted, add back.
      Dec(QHat);
      Value := 0;
      for I := 0 to RSize - 1 do
      begin
        Value := NormDividend[I + J] + NormDivisor[I] + Value shr CDivLimbBits;
        NormDividend[I + J] := TDivLimb(Value);
      end;
      Inc(NormDividend[J + RSize], Value shr CDivLimbBits);
    end;

    PQuotient[J] := QHat;
  end;

  // If the caller wants the remainder, unnormalize it and pass it back.
  if PRemainder <> nil then
    if Shift <> 0 then
      for I := 0 to RSize - 1 do
        PRemainder[I] := (TDblLimb(NormDividend[I]) shr Shift) or (TDblLimb(NormDividend[I + 1]) shl RevShift)
    else
      for I := 0 to RSize - 1 do
        PRemainder[I] := NormDividend[I];

  Result := True;
end;
{$ELSEIF DEFINED(WIN32)}
var
  LDividend, LDivisor, LQuotient: PLimb;                // Local copies of passed registers
  NormDividend, NormDivisor: PLimb;                     // Manually managed dynamic arrays
  QHat, RHat, Product: TUInt64;                         // 64 bit intermediate results
  Overflow: TLimb;                                      // "Carry" between multiplications
  Shift: Integer;                                       // Normalization shift
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        // To avoid reference count problems with Delphi's dynamic array types, we do our own,
        // "old school" dynarrays, using GetMem and FreeMem.

        XOR     EBX,EBX                         // Set "dynarrays" to nil, so the FreeMem calls won't fail.
        MOV     NormDividend,EBX
        MOV     NormDivisor,EBX

        MOV     LDividend,EAX
        MOV     LDivisor,EDX
        MOV     LQuotient,ECX

        MOV     ESI,LSize
        MOV     EDI,RSize
        CMP     ESI,EDI
        JL      @ExitFalse

        DEC     EDI
        JS      @ExitFalse
        JNE     @MultiLimbDivisor

// Simple division
//   Divisor only contains one single limb: simple division and exit.

@SingleLimbDivisor:

        MOV     EBX,[EDX]
        DEC     ESI
        MOV     EDI,EAX
        XOR     EDX,EDX

@SingleDivLoop:

        MOV     EAX,[EDI + CLimbSize*ESI]
        DIV     EAX,EBX
        MOV     [ECX + CLimbSize*ESI],EAX
        DEC     ESI
        JNS     @SingleDivLoop
        MOV     EAX,Remainder
        TEST    EAX,EAX
        JZ      @ExitTrue
        MOV     [EAX],EDX
        JMP     @ExitTrue

// Multilimb division
//   Divisor contains more than one limb: basecase division as described in Knuth's TAoCP.

@MultiLimbDivisor:

        MOV     EAX,RSize                  // GetMem(NormDivisor, RSize * CLimbSize);
        LEA     EAX,[EAX*CLimbSize]
        CALL    System.AllocMem
        MOV     NormDivisor,EAX

        MOV     EAX,LSize                 // GetMem(NormDividend, (LSize + 1) * CLimbSize);
        INC     EAX
        LEA     EAX,[EAX*CLimbSize]
        CALL    System.AllocMem
        MOV     NormDividend,EAX

// First: normalize Divisor by shifting left to eliminate leading zeroes
//        and shift Dividend left by same number of bits.

        // Get number of leading Divisor zeros (into ECX).

        MOV     ESI,LDivisor
        MOV     EBX,[ESI+CLimbSize*EDI]
        BSR     EBX,EBX
        MOV     ECX,31
        SUB     ECX,EBX
        MOV     Shift,ECX

        // Shift Divisor to NormDivisor by CL.

        MOV     EBX,EDI
        MOV     EDI,NormDivisor
        MOV     EAX,[ESI + CLimbSize*EBX]
        JMP     @ShiftDivisor

@ShiftDivisorLoop:

        MOV     EDX,[ESI + CLimbSize*EBX]
        SHLD    EAX,EDX,CL
        MOV     [EDI + CLimbSize*EBX + CLimbSize],EAX
        MOV     EAX,EDX

@ShiftDivisor:

        DEC     EBX
        JNS     @ShiftDivisorLoop

        // Handle lowest limb.

        SHL     EAX,CL
        MOV     [EDI],EAX

        // Shift Dividend to NormDividend by CL.

        MOV     EBX,LSize
        MOV     ESI,LDividend
        MOV     EDI,NormDividend
        XOR     EAX,EAX
        JMP     @ShiftDividend

@ShiftDividendLoop:

        MOV     EDX,[ESI + CLimbSize*EBX]
        SHLD    EAX,EDX,CL
        MOV     [EDI + CLimbSize*EBX + CLimbSize],EAX
        MOV     EAX,EDX

@ShiftDividend:

        DEC     EBX
        JNS     @ShiftDividendLoop

        // Handle lowest limb.

        SHL     EAX,CL
        MOV     [EDI],EAX

        MOV     EBX,LSize
        MOV     ECX,RSize

        MOV     ESI,NormDividend
        MOV     EDI,NormDivisor
        LEA     EDI,[EDI + CLimbSize*ECX - CLimbSize]

@MainLoop:

        XOR     EDX,EDX
        MOV     EAX,[ESI + CLimbSize*EBX]
        DIV     EAX,[EDI]
        MOV     QHat.Hi,EAX
        MOV     EAX,[ESI + CLimbSize*EBX - CLimbSize]
        DIV     EAX,[EDI]
        MOV     QHat.Lo,EAX
        MOV     RHat.Lo,EDX
        XOR     EDX,EDX
        MOV     RHat.Hi,EDX

@CheckAdjust:

        CMP     QHat.Hi,0
        JNE     @DoAdjust
        MOV     EAX,QHat.Lo
        MUL     EAX,[EDI - CLimbSize]

        CMP     EDX,RHat.Lo
        JA      @DoAdjust
        JB      @AdjustEnd
        CMP     EAX,[ESI + CLimbSize*EBX - 2*CLimbSize]
        JBE     @AdjustEnd

@DoAdjust:

        SUB     QHat.Lo,1
        SBB     QHat.Hi,0
        MOV     EAX,[EDI]
        ADD     RHat.Lo,EAX
        ADC     RHat.Hi,0
        JZ      @CheckAdjust

@AdjustEnd:

        // Now multiply NormDivisor by QHat and subtract the product from NormDividend[J].

        // Save a few registers.

        PUSH    EDI
        PUSH    EBX
        PUSH    ECX

        MOV     ECX,EBX
        SUB     ECX,RSize
        LEA     EDI,[ESI + CLimbSize*ECX]
        MOV     EAX,LQuotient
        MOV     EDX,QHat.Lo
        MOV     [EAX + CLimbSize*ECX],EDX
        XOR     EBX,EBX
        MOV     Overflow,EBX

@SubtractProduct:

        MOV     EAX,NormDivisor
        MOV     EAX,[EAX + CLimbSize*EBX]
        MUL     EAX,QHat.Lo
        MOV     Product.Lo,EAX
        MOV     Product.Hi,EDX
        XOR     EDX,EDX
        MOV     EAX,[EDI + CLimbSize*EBX]
        SUB     EAX,Overflow
        SBB     EDX,0
        SUB     EAX,Product.Lo
        SBB     EDX,0
        MOV     [EDI + CLimbSize*EBX],EAX
        MOV     EAX,Product.Hi
        SUB     EAX,EDX
        MOV     Overflow,EAX
        INC     EBX
        CMP     EBX,RSize
        JL      @SubtractProduct

@SubtractProductEnd:

        MOV     EBX,[ESP + 4]
        MOV     EDX,[ESI + CLimbSize*EBX]
        SUB     EDX,Overflow
        MOV     [ESI + CLimbSize*EBX],EDX
        JNC     @SkipAddBack

        // Add normalized divisor back, if necessary:

        MOV     EAX,LQuotient
        DEC     [EAX + CLimbSize*ECX]
        XOR     EBX,EBX
        MOV     Overflow,EBX

@AddBackLoop:

        CMP     EBX,RSize
        JGE     @AddBackLoopEnd
        XOR     EDX,EDX
        MOV     EAX,NormDivisor
        MOV     EAX,[EAX + CLimbSize*EBX]
        ADD     EAX,Overflow
        ADD     [EDI + CLimbSize*EBX],EAX
        ADC     EDX,0
        MOV     Overflow,EDX
        INC     EBX
        JMP     @AddBackLoop

@AddBackLoopEnd:

        MOV     EBX,[ESP + 4]
        ADD     [ESI + CLimbSize*EBX],EDX

@SkipAddBack:

        POP     ECX
        POP     EBX
        POP     EDI

        // End of main loop; loop if required.

        DEC     EBX
        CMP     EBX,ECX
        JGE      @MainLoop

        // NormDividend now contains remainder, scaled by Shift.
        // If Remainder <> nil, then shift NormDividend down into Remainder.

        MOV     EAX,Remainder
        TEST    EAX,EAX
        JE      @ExitTrue
        XOR     EBX,EBX
        MOV     ESI,NormDividend
        MOV     EDI,EAX
        MOV     ECX,Shift
        MOV     EAX,[ESI + CLimbSize*EBX]

@RemainderLoop:

        MOV     EDX,[ESI + CLimbSize*EBX + CLimbSize]
        SHRD    EAX,EDX,CL
        MOV     [EDI + CLimbSize*EBX],EAX
        MOV     EAX,EDX
        INC     EBX
        CMP     EBX,RSize
        JL      @RemainderLoop
        SHR     EDX,CL
        MOV     [EDI + CLimbSize*EBX],EDX
        JMP     @ExitTrue

@ExitFalse:

        MOV     BL,0
        JMP     @Exit

@ExitTrue:

        MOV     BL,1

@Exit:

        // Clear dynamic arrays.

        MOV     EAX,NormDividend
        CALL    System.@FreeMem

        MOV     EAX,NormDivisor
        CALL    System.@FreeMem

        MOV     EAX,EBX

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE}
var
  LDividend, LDivisor, LQuotient, LRemainder: PLimb;
  NormDividend, NormDivisor: PLimb;
  QHat, RHat, Product: TUInt64;
  Overflow: TLimb;
  Shift: Integer;
  SaveRDI, SaveRBX, SaveRCX: NativeUInt;
asm
        .PUSHNV RSI
        .PUSHNV RDI
        .PUSHNV RBX

        // To avoid reference count problems with Delphi's dynamic array types, we do or own,
        // "old school" dynarrays, using GetMem and FreeMem.

        XOR     EBX,EBX                 // Set "dynarrays" to nil, so FreeMem calls won't fail.
        MOV     NormDividend,RBX
        MOV     NormDivisor,RBX

        MOV     LDividend,RCX
        MOV     LDivisor,RDX
        MOV     LQuotient,R8
        MOV     LRemainder,R9

        MOV     ESI,LSize
        MOV     EDI,RSize
        CMP     ESI,EDI
        JL      @ExitFalse

        DEC     EDI
        JS      @ExitFalse
        JNE     @MultiLimbDivisor

// Simple division
//   Divisor only contains one single limb: simple division and exit.
//   NOTE: 32 bit division is easier and probably faster than 64 bit, even in 64 bit mode. This was tested for Decimals.pas.

@SingleLimbDivisor:

        MOV     EBX,[RDX]

        DEC     ESI
        MOV     RDI,RCX
        XOR     EDX,EDX

@SingleDivLoop:

        MOV     EAX,[RDI + CLimbSize*RSI]

        // ---------------------------------------------------------------------------------------------------------------------- //
        // NOTE: In XE2, in 64 bit asm, "DIV <r/m32>" is generated as "DIV <r/m64>", but "DIV EAX,<r/m32>" is generated correctly. //
        //       The same applies to "MUL <r/m32>".                                                                               //
        // ---------------------------------------------------------------------------------------------------------------------- //

        DIV     EAX,EBX
        MOV     [R8 + CLimbSize*RSI],EAX
        DEC     ESI
        JNS     @SingleDivLoop
        MOV     RAX,LRemainder
        TEST    RAX,RAX
        JZ      @ExitTrue
        MOV     [RAX],EDX
        JMP     @ExitTrue

// MultiLimb division
//   Divisor contains more than one limb: basecase division as described in Knuth's TAoCP Vol. 2.

@MultiLimbDivisor:

        MOV     ECX,RSize
        ADD     ECX,ECX
        ADD     ECX,ECX
        CALL    System.AllocMem
        MOV     NormDivisor,RAX

        MOV     ECX,LSize
        INC     ECX
        ADD     ECX,ECX
        ADD     ECX,ECX
        CALL    System.AllocMem
        MOV     NormDividend,RAX

// First: normalize Divisor by shifting left to eliminate leading zeroes
//        and shift Dividend left by same nubmer of bits.

        // Get number of leading Divisor zeroes (into ECX).

        MOV     RSI,LDivisor
        MOV     EBX,[RSI + CLimbSize*RDI]
        BSR     EBX,EBX
        MOV     ECX,31
        SUB     ECX,EBX
        MOV     Shift,ECX

        // Shift Divisor to NormDivisor by CL.

        MOV     EBX,EDI
        MOV     RDI,NormDivisor
        MOV     EAX,[RSI + CLimbSize*RBX]
        JMP     @ShiftDivisor

@ShiftDivisorLoop:

        MOV     EDX,[RSI + CLimbSize*RBX]
        SHLD    EAX,EDX,CL
        MOV     [RDI + CLimbSize*RBX + CLimbSize],EAX
        MOV     EAX,EDX

@ShiftDivisor:

        DEC     EBX
        JNS     @ShiftDivisorLoop

        // Handle lowest limb.

        SHL     EAX,CL
        MOV     [RDI],EAX

        // Shift Dividend to NormDividend by CL.

        MOV     EBX,LSize
        MOV     RSI,LDividend
        MOV     RDI,NormDividend
        XOR     EAX,EAX
        JMP     @ShiftDividend

@ShiftDividendLoop:

        MOV     EDX,[RSI + CLimbSize*RBX]
        SHLD    EAX,EDX,CL
        MOV     [RDI + CLimbSize*RBX + CLimbSize],EAX
        MOV     EAX,EDX

@ShiftDividend:

        DEC     EBX
        JNS     @ShiftDividendLoop

        // Handle lowest limb.

        SHL     EAX,CL
        MOV     [RDI],EAX

        MOV     EBX,LSize
        MOV     ECX,RSize

        MOV     RSI,NormDividend
        MOV     RDI,NormDivisor
        LEA     RDI,[RDI + CLimbSize*RCX - CLimbSize]

@MainLoop:

        XOR     EDX,EDX
        MOV     EAX,[RSI + CLimbSize*RBX]
        DIV     EAX,[RDI]
        MOV     QHat.Hi,EAX
        MOV     EAX,[RSI + CLimbSize*RBX - CLimbSize]
        DIV     EAX,[RDI]
        MOV     QHat.Lo,EAX
        MOV     RHat.Lo,EDX
        XOR     EDX,EDX
        MOV     RHat.Hi,EDX

@CheckAdjust:

        CMP     QHat.Hi,0
        JNE     @DoAdjust
        MOV     EAX,QHat.Lo
        MUL     EAX,[RDI - CLimbSize]

        CMP     EDX,RHat.Lo
        JA      @DoAdjust
        JB      @AdjustEnd
        CMP     EAX,[RSI + CLimbSize*RBX - 2*CLimbSize]
        JBE     @AdjustEnd

@DoAdjust:

        SUB     QHat.Lo,1
        SBB     QHat.Hi,0
        MOV     EAX,[RDI]
        ADD     RHat.Lo,EAX
        ADC     RHat.Hi,0
        JZ      @CheckAdjust

@AdjustEnd:

        MOV     SaveRDI,RDI
        MOV     SaveRBX,RBX
        MOV     SaveRCX,RCX

        MOV     ECX,EBX
        SUB     ECX,RSize
        LEA     RDI,[RSI + CLimbSize*RCX]
        MOV     RAX,LQuotient
        MOV     EDX,QHat.Lo
        MOV     [RAX + CLimbSize*RCX],EDX
        XOR     EBX,EBX
        MOV     Overflow,EBX

@SubtractProduct:

        MOV     RAX,NormDivisor
        MOV     EAX,[RAX + CLimbSize*RBX]
        MUL     EAX,QHat.Lo
        MOV     Product.Lo,EAX
        MOV     Product.Hi,EDX
        XOR     EDX,EDX
        MOV     EAX,[RDI + CLimbSize*RBX]
        SUB     EAX,Overflow
        SBB     EDX,0
        SUB     EAX,Product.Lo
        SBB     EDX,0
        MOV     [RDI + CLimbSize*RBX],EAX
        MOV     EAX,Product.Hi
        SUB     EAX,EDX
        MOV     Overflow,EAX
        INC     EBX
        CMP     EBX,RSize
        JL      @SubtractProduct

@SubtractProductEnd:

        MOV     RBX,SaveRBX
        MOV     EDX,[RSI + CLimbSize*RBX]
        SUB     EDX,Overflow
        MOV     [RSI + CLimbSize*RBX],EDX
        JNC     @SkipAddBack

        // Add normalized divisor back, if necessary:

        MOV     RAX,LQuotient
        DEC     DWORD PTR [RAX + ClimbSize*RCX]
        XOR     EBX,EBX
        MOV     Overflow,EBX

@AddBackLoop:

        CMP     EBX,RSize
        JGE     @AddBackLoopEnd
        XOR     EDX,EDX
        MOV     RAX,NormDivisor
        MOV     EAX,[RAX + CLimbSize*RBX]
        ADD     EAX,Overflow
        ADD     [RDI + CLimbSize*RBX],EAX
        ADC     EDX,0
        MOV     Overflow,EDX
        INC     EBX
        JMP     @AddBackLoop

@AddBackLoopEnd:

        MOV     RBX,SaveRBX
        ADD     [RSI + CLimbSize*RBX],EDX

@SkipAddBack:

        MOV     RCX,SaveRCX
        MOV     RBX,SaveRBX
        MOV     RDI,SaveRDI

        // End of main loop; loop if required

        DEC     EBX
        CMP     EBX,ECX
        JGE     @MainLoop

        // NormDividend now contains remainder, scaled by Shift.
        // If Remainder <> nil, then shift NormDividend down into Remainder

        MOV     RAX,LRemainder
        TEST    RAX,RAX
        JE      @ExitTrue
        XOR     EBX,EBX
        MOV     RSI,NormDividend
        MOV     RDI,RAX
        MOV     ECX,Shift
        MOV     EAX,[RSI + CLimbSize*RBX]

@RemainderLoop:

        MOV     EDX,[RSI + CLimbSize*RBX + CLimbSize]
        SHRD    EAX,EDX,CL
        MOV     [RDI + CLimbSize*RBX],EAX
        MOV     EAX,EDX
        INC     EBX
        CMP     EBX,RSize
        JL      @RemainderLoop
        SHR     EDX,CL
        MOV     [RDI + CLimbSize*RBX],EDX
        JMP     @ExitTrue

@ExitFalse:

        MOV     BL,False
        JMP     @Exit

@ExitTrue:

        MOV     BL,True

@Exit:

        // Clear dynamic arrays.

        MOV     RCX,NormDividend
        CALL    System.@FreeMem

        MOV     RCX,NormDivisor
        CALL    System.@FreeMem

        MOV     EAX,EBX

end;
{$IFEND}

// Note: only handles Abs(Self) > 0.
class procedure BigInteger.InternalIncrement(Limbs: PLimb; Size: Integer);
{$IFDEF PUREPASCAL}
var
  N: TLimb;
begin
  N := MaxInt;
  while Size > 0 do
  begin
    N := Limbs^;
    Inc(N);
    Limbs^ := N;
    if N <> 0 then
      Break;
    Inc(Limbs);
    Dec(Size);
  end;
  if N = 0 then
  begin
    Limbs^ := 1;
  end;
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm

        TEST    EDX,EDX
        JE      @Exit

@Loop:

        MOV     ECX,[EAX]
        INC     ECX
        MOV     [EAX],ECX
        TEST    ECX,ECX
        JNE     @Exit
        LEA     EAX,[EAX + CLimbSize]
        DEC     EDX
        JNE     @Loop

@Last:

        TEST    ECX,ECX
        JNE     @Exit
        MOV     TLimb PTR [EAX],1

@Exit:

end;
{$ELSE !WIN32}
asm

        TEST    EDX,EDX
        JE      @Exit

@Loop:

        MOV     EAX,[RCX]
        INC     EAX
        MOV     [RCX],EAX
        TEST    EAX,EAX
        JNE     @Exit
        LEA     RCX,[RCX + CLimbSize]
        DEC     EDX
        JNE     @Loop

@Last:

        TEST    EAX,EAX
        JNE     @Exit
        MOV     TLimb PTR [RCX],1

@Exit:

end;
{$ENDIF !WIN32}
{$ENDIF !PUREPASCAL}

// Note: only handles Abs(Self) > 1
class procedure BigInteger.InternalDecrement(Limbs: PLimb; Size: Integer);
{$IFDEF PUREPASCAL}
begin
  repeat
    Dec(Limbs^);
    if Limbs^ <> TLimb(-1) then
      Break;
    Inc(Limbs);
    Dec(Size);
  until Size = 0;
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm

@Loop:

        MOV     ECX,[EAX]
        DEC     ECX
        MOV     [EAX],ECX
        CMP     ECX,-1
        JNE     @Exit
        LEA     EAX,[EAX + CLimbSize]
        DEC     EDX
        JNE     @Loop

@Exit:

end;
{$ELSE !WIN32}
asm

@Loop:

        MOV     EAX,[RCX]
        DEC     EAX
        MOV     [RCX],EAX
        CMP     EAX,-1
        JNE     @Exit
        LEA     RCX,[RCX + CLimbSize]
        DEC     EDX
        JNE     @Loop

@Exit:

end;
{$ENDIF !WIN32}
{$ENDIF !PUREPASCAL}

// Divides a magnitude (usually the FData of a TBigInteger) by Base and returns the remainder.
class function BigInteger.InternalDivideByBase(Mag: PLimb; Base: Integer; var Size: Integer): UInt32;
{$IF DEFINED(PUREPASCAL)}

// This routine uses DivMod(Cardinal, Word, Word, Word).
// In Win32, that is 14 times faster than the previous version using the DivMod with UInt64 parameters.
// In Win64, it is only a little bit slower.

type
  UInt32Rec = record
    Lo, Hi: UInt16;
  end;
  PUInt16 = ^UInt16;

var
  P, PMag: PUInt16;
  Remainder: UInt16;
  CurrentWord: UInt32;
begin
  Result := 0;
  if Size = 0 then
    Exit;
  PMag := PUInt16(Mag);
  P := PMag + Size * 2;
  Remainder := 0;
  while P > PMag do
  begin
    Dec(P);
    UInt32Rec(CurrentWord).Lo := P^;
    UInt32Rec(CurrentWord).Hi := Remainder;
    Math.DivMod(CurrentWord, Base, P^, Remainder);
  end;
  Result := Remainder;
  if Mag[Size - 1] = 0 then
    Dec(Size);
end;
{$ELSEIF DEFINED(WIN32)}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX
        MOV     EBX,ECX                         // var Size
        MOV     ECX,EDX
        MOV     ESI,EAX                         // PBase (= Mag)
        MOV     EDX,[EBX]
        XOR     EAX,EAX                         // Result
        TEST    EDX,EDX
        JE      @Exit
        LEA     EDI,[ESI + CLimbSize*EDX]       // P
        XOR     EDX,EDX                         // Remainder := 0;
        CMP     EDI,ESI                         // while P > PBase do
        JBE     @CheckSize
@Loop:
        SUB     EDI,4                           // Dec(P);
        MOV     EAX,[EDI]                       // DivMod(P^ or (Remainder shl 32), 10, P^, Remainder);
        DIV     EAX,ECX
        MOV     [EDI],EAX
        CMP     EDI,ESI                         // while P > PBase do
        JA      @Loop
@CheckSize:
        MOV     EAX,EDX                         // if (PBase + Size - 1)^ = 0 then
        MOV     EDX,[EBX]
        LEA     ESI,[ESI + CLimbSize*EDX - CLimbSize]
        CMP     [ESI],0
        JNE     @Exit
        DEC     DWORD PTR [EBX]                 //   Dec(Size);
@Exit:
        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE}
asm
        .NOFRAME

        MOV     R11,R8                          // var Size
        MOV     R9,RCX                          // PBase := Mag;
        MOV     ECX,EDX
        XOR     EAX,EAX                         // Result := 0;
        MOV     EDX,[R11]                       // if Size = 0 then Exit;
        OR      EDX,EDX
        JE      @Exit
        LEA     R10,[R9 + CLimbSize*RDX]        // P
        XOR     EDX,EDX                         // Remainder := 0;
        CMP     R10,R9                          // while P > PBase do
        JBE     @CheckSize
@Loop:
        SUB     R10,4                           // Dec(P)
        MOV     EAX,[R10]                       // DivMod(P^ or (Remainder shl 32), 10, P^, Remainder);
        DIV     EAX,ECX
        MOV     [R10],EAX
        CMP     R10,R9                          // while P > PBase do
        JA      @Loop
@CheckSize:
        MOV     EAX,EDX
        MOV     EDX,[R11]
        CMP     [R9 + CLimbSize*RDX - CLimbSize],0   // if (PBase + Size - 1)^ = 0 then
        JNE     @Exit
        DEC     DWORD PTR [R11]                 //   Dec(Size);
@Exit:
end;
{$IFEND}

class operator BigInteger.Equal(const Left, Right: BigInteger): Boolean;
begin
  Result := Compare(Left, Right) = 0;
end;

class procedure BigInteger.Error(ErrorCode: TErrorCode; const ErrorInfo: string);
begin
  case ErrorCode of
    ecParse:
      raise EConvertError.CreateFmt(SErrorBigIntegerParsing, [ErrorInfo]);
    ecDivbyZero:
      raise EZeroDivide.Create(SDivisionByZero);
    ecConversion:
      raise EConvertError.CreateFmt(SConversionFailed, [ErrorInfo]);
    ecOverflow:
      raise EOverflow.Create(SOverflow);
    ecInvalidArgument:
      raise EInvalidArgument.Create(SInvalidArgumentBase);
  else
    raise EInvalidOp.Create(SInvalidOperation);
  end;
end;

class operator BigInteger.Explicit(const Int: BigInteger): Cardinal;
begin
  if Int.FData = nil then
    Result := 0
  else
    Result := Int.FData[0] and High(Cardinal);
end;

class operator BigInteger.Explicit(const Int: BigInteger): Integer;
begin
  if Int.FData = nil then
    Result := 0
  else
  begin
    Result := Int.FData[0] and High(Integer);
    if Int.FSize < 0 then
      Result := -Result;
  end;
end;

class operator BigInteger.Explicit(const Int: BigInteger): Int64;
begin
  if Int.FData = nil then
    Result := 0
  else
  begin
    TUInt64(Result).Lo := Int.FData[0];
    if (Int.FSize and SizeMask) > 1 then
      TUInt64(Result).Hi := Int.FData[1] and High(Integer)
    else
      TUInt64(Result).Hi := 0;
    if Int.FSize < 0 then
      Result := -Result;
  end;
end;

function BigInteger.AsCardinal: Cardinal;
begin
  Result := 0;
  if not IsNegative and (BitLength <= CCardinalBits) then
    Result := Cardinal(Self)
  else
    Error(ecConversion, 'Cardinal');
end;

function GetBitAt(FData: PLimb; BitNum: Integer): Boolean;
begin
  Result := (FData[BitNum div 32] and (1 shl (BitNum and 31))) <> 0
end;

function BigInteger.AsDouble: Double;
const
  BitMasks: array[0..31] of Cardinal = // $FFFFFFFF shl (31 - Index)
  (
    $00000001, $00000003, $00000007, $0000000F, $0000001F, $0000003F, $0000007F, $000000FF,
    $000001FF, $000003FF, $000007FF, $00000FFF, $00001FFF, $00003FFF, $00007FFF, $0000FFFF,
    $0001FFFF, $0003FFFF, $0007FFFF, $000FFFFF, $001FFFFF, $003FFFFF, $007FFFFF, $00FFFFFF,
    $01FFFFFF, $03FFFFFF, $07FFFFFF, $0FFFFFFF, $1FFFFFFF, $3FFFFFFF, $7FFFFFFF, $FFFFFFFF
  );

  // The four values below will result in exactly the same value as: Result := StrToFloat(Self.ToString);
  ExponentBias = 1023;                  // DO NOT CHANGE!!!
  SignificandBits = 52;                 // DO NOT CHANGE!!!
  GuardOffset = SignificandBits + 2;    // DO NOT CHANGE!!!
  ExponentBits = 11;                    // DO NOT CHANGE!!!

  ExponentShift = SignificandBits - CUInt32Bits;
  ExponentMask = Pred(1 shl ExponentBits);
  SignificandMask = Pred(1 shl ExponentShift);
var
  BitLen: Integer;
  StickyIndex: Integer;
  StickyBits: TLimb;
  Guard, Round: Boolean;
  NumLeadingZeroes, K, I: Integer;
  LSize: Integer;
  Res: packed record
    case Byte of
      0: (Dbl: Double);
      1: (Bits: UInt64);
      2: (Lo, Hi: UInt32);
  end;
begin
  BitLen := BitLength;
  if BitLen > 1025 then
    if FSize < 0 then
      Exit(NegInfinity)
    else
      Exit(Infinity);
//    Error(ecConversion, 'Double');
  if BitLen <= CInt64Bits then
    Result := AsInt64
  else
  begin
    LSize := Size;

    // Form significand from top 53 bits of BigInteger.
    NumLeadingZeroes := (CLimbBits - BitLen) and 31;
    if NumLeadingZeroes > 11 then
    begin

      // 53 bits spread over 3 limbs, e.g.:
      // FData[LSize-1]                   FData[LSize-2]                   FData[LSize-3]
      // 10----5----0----5----0----5----0 10----5----0----5----0----5----0 10----5----0----5----0----5----0
      // 000000000000000000000000000AAAAA BBBBBBBBBBBBBBBbbbbbbbbbbbbbbbbb CCCCCCCCCCCCCCCC****************

      K := NumLeadingZeroes - 11;
      Res.Hi := (FData[LSize - 1] shl K) or (FData[LSize - 2] shr (CLimbBits - K)); { a shl K or b shr (31 - K) }
      Res.Lo := (FData[LSize - 2] shl K) or (FData[LSize - 3] shr (CLimbBits - K)); { b shl K or c shr (31 - K) }

      // Res.Hi                           Res.Lo
      // 10----5----0----5----0----5----0 10----5----0----5----0----5----0
      // 00000000000AAAAABBBBBBBBBBBBBBBB bbbbbbbbbbbbbbbbCCCCCCCCCCCCCCCC

    end
    else
    begin

      // 53 bits spread over 2 limbs, e.g.:
      // FData[LSize-1]                   FData[LSize-2]
      // 10----5----0----5----0----5----0 10----5----0----5----0----5----0
      // 000000AAAAAAAAAAAAAAAAAAAAAaaaaa BBBBBBBBBBBBBBBBBBBBBBBBBBB*****

      Res.Hi := FData[LSize - 1];
      Res.Lo := FData[LSize - 2];
      if NumLeadingZeroes < 11 then
        Res.Bits := Res.Bits shr (11 - NumLeadingZeroes);

      // Res.Hi                           Res.Lo
      // 10----5----0----5----0----5----0 10----5----0----5----0----5----0
      // 00000000000AAAAAAAAAAAAAAAAAAAAA aaaaaBBBBBBBBBBBBBBBBBBBBBBBBBBB

    end;

    // Rounding, using guard, round and sticky bit.
    // Guard is below lowest bit of significand, Round bit one below that, and Sticky bit is accumulation of all bits
    // below the other two.

    // GRSB - Action (slightly modified; B is lowest bit of significand)
    // 0xxx - round down = do nothing (x means any bit value, 0 or 1)
    // 1000 - round down = do nothing
    // 1001 - round up
    // 101x - round up
    // 110x - round up
    // 111x - round up

    // Collect all bits below round bit into sticky bit.
    StickyIndex := BitLen - GuardOffset - 2;

    StickyBits := 0;
    // First collect from limbs below sticky bit.
    for I := 0 to StickyIndex div 32 - 1 do
      StickyBits := StickyBits or FData[I];
    // Then include bits up to the sticky bit.
    StickyBits := StickyBits or (FData[StickyIndex div CLimbBits] and BitMasks[StickyIndex and (ClimbBits - 1)]);

    // Get guard and round bits.
    Round := GetBitAt(PLimb(FData), StickyIndex + 1);
    Guard := GetBitAt(PLimb(FData), StickyIndex + 2);

    // See table above.
    if Guard and (Odd(Res.Lo) or Round or (StickyBits <> 0)) then
      Res.Bits := Res.Bits + 1;

    // Beware of overflowing the significand!
    if Res.Bits > $1FFFFFFFFFFFFF then
    begin
      Res.Bits := Res.Bits shr 1;
      Inc(BitLen);
    end;

    // Remove hidden bit and place exponent and sign bit to form a complete Double.
    Res.Hi := (Res.Hi and SignificandMask) or                           // top of significand, hidden bit removed
              UInt32(((BitLen - 1 + ExponentBias) and ExponentMask) shl ExponentShift) or  // exponent, unbiased
              UInt32(SignBitOf(FSize));                                                    // sign bit

    Result := Res.Dbl;
  end;
end;

function BigInteger.AsInt64: Int64;
begin
  Result := 0;
  if BitLength <= CInt64Bits then
    Result := Int64(Self)
  else
    Error(ecConversion, 'Int64');
end;

function BigInteger.AsInteger: Integer;
begin
  Result := 0;
  if BitLength <= CIntegerBits then
    Result := Integer(Self)
  else
    Error(ecConversion, 'Integer');
end;

function BigInteger.AsUInt64: UInt64;
begin
  Result := 0;
  if not IsNegative and (BitLength <= CUInt64Bits) then
    Result := UInt64(Self)
  else
    Error(ecConversion, 'UInt64');
end;

class operator BigInteger.Explicit(const Int: BigInteger): UInt64;
begin
  if Int.FData = nil then
    Result := 0
  else
  begin
    TUInt64(Result).Lo := Int.FData[0];
    if (Int.FSize and SizeMask) > 1 then
      TUInt64(Result).Hi := Int.FData[1] and High(Cardinal)
    else
      TUInt64(Result).Hi := 0;
  end;
end;

class function BigInteger.InternalCompare(Left, Right: PLimb; LSize, RSize: Integer): TValueSign;
{$IFDEF PUREPASCAL}
var
  L, R: PLimb;
begin
  if Left = nil then
  begin
    if Right = nil then
      Exit(0)
    else
      Exit(-1);
  end;
  if Right = nil then
    Exit(1);
  if LSize > RSize then
    Result := 1
  else if LSize < RSize then
    Result := -1
  else
  // Same size, so compare values. Start at the "top" (most significant limb).
  begin
    L := Left + LSize - 1;
    R := Right + LSize - 1;
    while L >= Left do
    begin
      if L^ > R^  then
        Exit(1)
      else if L^ < R^ then
        Exit(-1);
      Dec(L);
      Dec(R);
    end;
    Exit(0);
  end;
end;
{$ELSE !PUREPASCAL}
{$IFDEF WIN32}
asm
        PUSH    ESI

        TEST    EAX,EAX
        JNE     @LeftNotNil
        TEST    EDX,EDX
        JZ      @ExitZero
        JMP     @ExitNeg

@LeftNotNil:

        TEST    EDX,EDX
        JZ      @ExitPos

        CMP     ECX,RSize
        JA      @ExitPos
        JB      @ExitNeg

        MOV     ESI,EAX

@Loop:

        MOV     EAX,[ESI + ECX*CLimbSize - CLimbSize]
        CMP     EAX,[EDX + ECX*CLimbSize - CLimbSize]
        JA      @ExitPos
        JB      @ExitNeg
        DEC     ECX
        JNE     @Loop

@ExitZero:

        XOR     EAX,EAX
        JMP     @Exit

@ExitPos:

        MOV     EAX,1
        JMP     @Exit

@ExitNeg:

        MOV     EAX,-1

@Exit:

        POP     ESI
end;
{$ELSE WIN64}
asm
        TEST    RCX,RCX
        JNZ     @LeftNotNil

        // Left is nil
        TEST    RDX,RDX
        JZ      @ExitZero                       // if Right nil too, then equal
        JMP     @ExitNeg                        // Otherwise, Left < Right

@LeftNotNil:

        TEST    RDX,RDX
        JZ      @ExitPos

        CMP     R8D,R9D
        JA      @ExitPos
        JB      @ExitNeg

        // R8D and R9D are same.

        LEA     RCX,[RCX + R8*CLimbSize]
        LEA     RDX,[RDX + R8*CLimbSize]

        TEST    R8D,1
        JZ      @NotOdd

        LEA     RCX,[RCX - CLimbSize]
        LEA     RDX,[RDX - CLimbSize]
        MOV     EAX,[RCX]
        CMP     EAX,[RDX]
        JA      @ExitPos
        JB      @ExitNeg
        DEC     R8D

@NotOdd:

        SHR     R8D,1
        JZ      @ExitZero

@Loop:

        LEA     RCX,[RCX - DLimbSize]
        LEA     RDX,[RDX - DLimbSize]
        MOV     RAX,[RCX]
        CMP     RAX,[RDX]
        JA      @ExitPos
        JB      @ExitNeg
        DEC     R8D
        JNE     @Loop

@ExitZero:

        XOR     EAX,EAX
        JMP     @Exit

@ExitPos:

        MOV     EAX,1
        JMP     @Exit

@ExitNeg:

        MOV     EAX,-1

@Exit:

end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

//class function BigInteger.InternalCompareMagnitudes(Left, Right: PLimb; LSize, RSize: Integer): TValueSign;
//begin
//  Result := InternalCompare(Left, Right, LSize, RSize);
//end;
//
{$IFNDEF PUREPASCAL}
class procedure BigInteger.InternalSubtractModified(Larger, Smaller, Result: PLimb; LSize, SSize: Integer);
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX                         // Left
        MOV     EDI,EDX                         // Right
        MOV     EBX,ECX                         // Result

        MOV     ECX,SSize
        MOV     EDX,LSize

        SUB     EDX,ECX
        PUSH    EDX
        XOR     EDX,EDX

        XOR     EAX,EAX

        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail

@MainLoop:

        MOV     EAX,[ESI]
        SBB     EAX,[EDI]
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        SBB     EAX,[EDI + CLimbSize]
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        SBB     EAX,[EDI + 2*CLimbSize]
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        SBB     EAX,[EDI + 3*CLimbSize]
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + 4*CLimbSize]
        LEA     EDI,[EDI + 4*CLimbSize]
        LEA     EBX,[EBX + 4*CLimbSize]

        LEA     ECX,[ECX - 1]
        JECXZ   @MainTail
        JMP     @Mainloop

@MainTail:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EDI,[EDI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@JumpsMain]
        JMP     [ECX + EDX*TYPE Pointer]

        // Align jump table manually, with NOPs. Update if necessary.

        NOP

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     EAX,[ESI - 3*CLimbSize]
        SBB     EAX,[EDI - 3*CLimbSize]
        MOV     [EBX - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[ESI - 2*CLimbSize]
        SBB     EAX,[EDI - 2*CLimbSize]
        MOV     [EBX - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[ESI - CLimbSize]
        SBB     EAX,[EDI - CLimbSize]
        MOV     [EBX - CLimbSize],EAX

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDI,EDI

        POP     ECX
        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        JECXZ   @RestLast3

@RestLoop:

        MOV     EAX,[ESI]
        SBB     EAX,EDI
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + 4*CLimbSize]
        LEA     EBX,[EBX + 4*CLimbSize]

        LEA     ECX,[ECX - 1]
        JECXZ   @RestLast3
        JMP     @RestLoop

@RestLast3:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@RestJumps]
        JMP     [ECX + EDX*TYPE Pointer]

        // If necessary, align second jump table with NOPs

        NOP
        NOP
        NOP

@RestJumps:

        DD      @Exit
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EAX,[ESI - 3*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[ESI - 2*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[ESI - CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX - CLimbSize],EAX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN32/WIN64}
asm
        MOV     R10,RCX
        MOV     ECX,SSize

        // R10 = Left, RDX = Right, R8 = Result, R9D = LSize, ECX = SSize

        CMP     R9D,ECX
        JAE     @SkipSwap
        XCHG    ECX,R9D
        XCHG    R10,RDX

@SkipSwap:

        SUB     R9D,ECX
        PUSH    R9

        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail

@MainLoop:

        MOV     RAX,[R10]
        SBB     RAX,[RDX]
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize]
        SBB     RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        LEA     RCX,[RCX - 1]
        JRCXZ   @MainTail
        JMP     @MainLoop

@MainTail:

// Here, code does not add index*CLimbSize and then use negative offsets, because that would take away the advantage of using 64 bit registers.
// Each block is separate, no fall through.

        LEA     RCX,[@MainJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // Align jump table. Update if necessary!

        DB      $90,$90,$90,$90,$90

@MainJumps:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     RAX,[R10]
        SBB     RAX,[RDX]
        MOV     [R8],RAX

        MOV     EAX,[R10 + 2*CLimbSize]
        SBB     EAX,[RDX + 2*CLimbSize]
        MOV     [R8 + 2*CLimbSize],EAX

        LEA     R10,[R10 + 3*CLimbSize]
        LEA     RDX,[RDX + 3*CLimbSize]
        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @DoRestLoop

@Main2:

        MOV     RAX,[R10]
        SBB     RAX,[RDX]
        MOV     [R8],RAX

        LEA     R10,[R10 + 2*CLimbSize]
        LEA     RDX,[RDX + 2*CLimbSize]
        LEA     R8,[R8 + 2*CLimbSize]

        JMP     @DoRestLoop

@Main1:

        MOV     EAX,[R10]
        SBB     EAX,[RDX]
        MOV     [R8],EAX

        LEA     R10,[R10 + CLimbSize]
        LEA     RDX,[RDX + CLimbSize]
        LEA     R8,[R8 + CLimbSize]

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDX,EDX

        POP     RCX
        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        JECXZ   @RestLast3

@RestLoop:

        MOV     RAX,[R10]
        SBB     RAX,RDX
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize]
        SBB     RAX,RDX
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        LEA     RCX,[RCX - 1]
        JRCXZ   @RestLast3
        JMP     @RestLoop

@RestLast3:

        LEA     RCX,[@RestJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // If necessary, align second jump table with NOPs

        DB      $90,$90,$90,$90,$90,$90,$90

@RestJumps:

        DQ      @Exit
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     RAX,[R10]
        SBB     RAX,RDX
        MOV     [R8],RAX

        MOV     EAX,[R10 + DLimbSize]
        SBB     EAX,EDX
        MOV     [R8 + DLimbSize],EAX

        JMP     @Exit

@Rest2:

        MOV     RAX,[R10]
        SBB     RAX,RDX
        MOV     [R8],RAX

        JMP     @Exit

@Rest1:

        MOV     EAX,[R10]
        SBB     EAX,EDX
        MOV     [R8],EAX

@Exit:

end;
{$ENDIF}

class procedure BigInteger.InternalSubtractPlain(Larger, Smaller, Result: PLimb; LSize, SSize: Integer);
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX                         // Left
        MOV     EDI,EDX                         // Right
        MOV     EBX,ECX                         // Result

        MOV     ECX,SSize
        MOV     EDX,LSize

        SUB     EDX,ECX
        PUSH    EDX
        XOR     EDX,EDX

        XOR     EAX,EAX

        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail

@MainLoop:

        // Unrolled 4 times. More times will not improve speed anymore.

        MOV     EAX,[ESI]
        SBB     EAX,[EDI]
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        SBB     EAX,[EDI + CLimbSize]
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        SBB     EAX,[EDI + 2*CLimbSize]
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        SBB     EAX,[EDI + 3*CLimbSize]
        MOV     [EBX + 3*CLimbSize],EAX

        // Update pointers.

        LEA     ESI,[ESI + 4*CLimbSize]
        LEA     EDI,[EDI + 4*CLimbSize]
        LEA     EBX,[EBX + 4*CLimbSize]

        // Update counter and loop if required.

        DEC     ECX                             // Note: if INC/DEC must be emulated: LEA ECX,[ECX - 1]; JECXZ @MainTail; JMP @MainLoop
        JNE     @MainLoop

@MainTail:

        // Add index*CLimbSize so @MainX branches can fall through.

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EDI,[EDI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        // Indexed jump.

        LEA     ECX,[@JumpsMain]
        JMP     [ECX + EDX*TYPE Pointer]

        // Align jump table manually, with NOPs. Update if necessary.

        NOP

        // Jump table.

@JumpsMain:

        DD      @DoRestLoop
        DD      @Main1
        DD      @Main2
        DD      @Main3

@Main3:

        MOV     EAX,[ESI - 3*CLimbSize]         // negative offset, because index*CLimbSize was already added.
        SBB     EAX,[EDI - 3*CLimbSize]
        MOV     [EBX - 3*CLimbSize],EAX

@Main2:

        MOV     EAX,[ESI - 2*CLimbSize]
        SBB     EAX,[EDI - 2*CLimbSize]
        MOV     [EBX - 2*CLimbSize],EAX

@Main1:

        MOV     EAX,[ESI - CLimbSize]
        SBB     EAX,[EDI - CLimbSize]
        MOV     [EBX - CLimbSize],EAX

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDI,EDI

        POP     ECX
        MOV     EDX,ECX
        AND     EDX,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        INC     ECX
        DEC     ECX
        JE      @RestLast3              // JECXZ is slower than INC/DEC/JE

@RestLoop:

        MOV     EAX,[ESI]
        SBB     EAX,EDI
        MOV     [EBX],EAX

        MOV     EAX,[ESI + CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX + CLimbSize],EAX

        MOV     EAX,[ESI + 2*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX + 2*CLimbSize],EAX

        MOV     EAX,[ESI + 3*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX + 3*CLimbSize],EAX

        LEA     ESI,[ESI + 4*CLimbSize] // LEA does not affect the flags, so carry will not be changed.
        LEA     EBX,[EBX + 4*CLimbSize]

        DEC     ECX                     // DEC does not affect carry flag, but causes partial-flags stall (e.g. when using SBB) on older CPUs.
        JNE     @RestLoop

@RestLast3:

        LEA     ESI,[ESI + EDX*CLimbSize]
        LEA     EBX,[EBX + EDX*CLimbSize]

        LEA     ECX,[@RestJumps]
        JMP     [ECX + EDX*TYPE Pointer]

        // If necessary, align second jump table with NOPs

        NOP
        NOP
        NOP

@RestJumps:

        DD      @Exit
        DD      @Rest1
        DD      @Rest2
        DD      @Rest3

@Rest3:

        MOV     EAX,[ESI - 3*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX - 3*CLimbSize],EAX

@Rest2:

        MOV     EAX,[ESI - 2*CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX - 2*CLimbSize],EAX

@Rest1:

        MOV     EAX,[ESI - CLimbSize]
        SBB     EAX,EDI
        MOV     [EBX - CLimbSize],EAX

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN32/WIN64}
asm
        MOV     R10,RCX         // in emulating code, ECX must be used as loop counter! So do not exchange RCX and R10 in the editor.
        MOV     ECX,SSize

        // R10 = Left, RDX = Right, R8 = Result, R9D = LSize, ECX = SSize

        CMP     R9D,ECX
        JAE     @SkipSwap
        XCHG    ECX,R9D
        XCHG    R10,RDX

@SkipSwap:

        SUB     R9D,ECX
        PUSH    R9

        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        CLC
        JE      @MainTail               // ECX = 0, so fewer than 3 limbs to be processed in main

@MainLoop:

        MOV     RAX,[R10]               // Add two limbs at once, taking advantage of 64 bit registers.
        SBB     RAX,[RDX]
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize] // And next two limbs too.
        SBB     RAX,[RDX + DLimbSize]
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     RDX,[RDX + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        DEC     ECX                     // if INC/DEC must be emulated: LEA ECX,[ECX - 1]; JECXZ @MainTail; JMP @MainLoop
        JNE     @MainLoop

@MainTail:

// Here, code does not add index*CLimbSize and then use negative offsets, because that would take away the advantage of using 64 bit registers.
// Each block is separate, no fall through.

        LEA     RCX,[@MainJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // Align jump table. Update if necessary!

        NOP

@MainJumps:

        DQ      @DoRestLoop
        DQ      @Main1
        DQ      @Main2
        DQ      @Main3

@Main3:

        MOV     RAX,[R10]
        SBB     RAX,[RDX]
        MOV     [R8],RAX

        MOV     EAX,[R10 + DLimbSize]
        SBB     EAX,[RDX + DLimbSize]
        MOV     [R8 + 2*CLimbSize],EAX

        LEA     R10,[R10 + 3*CLimbSize]
        LEA     RDX,[RDX + 3*CLimbSize]
        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @DoRestLoop

@Main2:

        MOV     RAX,[R10]
        SBB     RAX,[RDX]
        MOV     [R8],RAX

        LEA     R10,[R10 + DLimbSize]
        LEA     RDX,[RDX + DLimbSize]
        LEA     R8,[R8 + DLimbSize]

        JMP     @DoRestLoop

@Main1:

        MOV     EAX,[R10]
        SBB     EAX,[RDX]
        MOV     [R8],EAX

        LEA     R10,[R10 + CLimbSize]
        LEA     RDX,[RDX + CLimbSize]
        LEA     R8,[R8 + CLimbSize]

@DoRestLoop:

        SETC    AL                      // Save Carry Flag

        XOR     EDX,EDX

        POP     RCX
        MOV     R9D,ECX
        AND     R9D,CUnrollMask
        SHR     ECX,CUnrollShift

        ADD     AL,255                  // Restore Carry Flag.

        INC     ECX
        DEC     ECX
        JE      @RestLast3              // JECXZ is slower than INC/DEC/JE

@RestLoop:

        MOV     RAX,[R10]               // Do two limbs at once.
        SBB     RAX,RDX
        MOV     [R8],RAX

        MOV     RAX,[R10 + DLimbSize] // And the next two limbs.
        SBB     RAX,RDX
        MOV     [R8 + DLimbSize],RAX

        LEA     R10,[R10 + 2*DLimbSize]
        LEA     R8,[R8 + 2*DLimbSize]

        DEC     ECX
        JNE     @RestLoop

@RestLast3:

        LEA     RCX,[@RestJumps]
        JMP     [RCX + R9*TYPE Pointer]

        // If necessary, align second jump table with NOPs

@RestJumps:

        DQ      @Exit
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     RAX,[R10]
        SBB     RAX,RDX
        MOV     [R8],RAX

        MOV     EAX,[R10 + 2*CLimbSize]
        SBB     EAX,EDX
        MOV     [R8 + 2*CLimbSize],EAX

        LEA     R8,[R8 + 3*CLimbSize]

        JMP     @Exit

@Rest2:

        MOV     RAX,[R10]
        SBB     RAX,RDX
        MOV     [R8],RAX

        LEA     R8,[R8 + 2*CLimbSize]

        JMP     @Exit

@Rest1:

        MOV     EAX,[R10]
        SBB     EAX,EDX
        MOV     [R8],EAX

        LEA     R8,[R8 + CLimbSize]

@Exit:

end;
{$ENDIF !WIN32}
{$ENDIF !PUREPASCAL}

{$IFDEF PUREPASCAL}
class procedure BigInteger.InternalSubtractPurePascal(Larger, Smaller, Result: PLimb; LSize, SSize: Integer);
var
  Diff: TLimb;
  Borrow, InterBorrow: TLimb;
  LTail: Integer;
  LCount: Integer;
{$IFDEF CPUX64}
  Diff64, Borrow64, InterBorrow64, Larger64: UInt64;
{$ENDIF}
begin
{$IFDEF CPUX64}
  Borrow64 := 0;
{$ELSE}
  Borrow := 0;
{$ENDIF}

  Dec(LSize, SSize);
  LTail := SSize and CUnrollMask;
  LCount := SSize shr CUnrollShift;

  // Subtract, with borrow, Smallest from Largest and store result in Result.
  while LCount > 0 do
  begin

    ///////////////////////////////////////////////////////////////////////////////////
    // Tests with 64 bit intermediate results like:                                  //
    //                                                                               //
    //   LResult := Int64(Larger[0]) - Smaller[0] + Integer(TUInt64(LResult).Hi);    //
    //   Result[0] := TLimb(LResult);                                                //
    //                                                                               //
    //   LResult := Int64(Larger[1]) - Smaller[1] + Integer(TUInt64(LResult).Hi);    //
    //   Result[1] := TLimb(LResult);                                                //
    //   // etc...                                                                   //
    //                                                                               //
    // ... turned out to be slower than the following carry emulating code, even     //
    // for 64 bit targets.                                                           //
    ///////////////////////////////////////////////////////////////////////////////////

    ///////////////////////////////////////////////////////////////////////////////////
    // Tests with loop unrolling by a factor > 4 did not improve results at all.     //
    ///////////////////////////////////////////////////////////////////////////////////

  {$IFDEF CPUX64}
    // add UInt64s instead of limbs.
    Larger64 := PUInt64(Larger)[0];
    Diff64 := Larger64 - PUInt64(Smaller)[0];
    InterBorrow64 := Ord(Diff64 > Larger64);
    Diff64 := Diff64 - Borrow64;
    PUInt64(Result)[0] := Diff64;
    Borrow64 := InterBorrow64 or Ord(Diff64 = UInt64(-1)) and Borrow64;

    Larger64 := PUInt64(Larger)[1];
    Diff64 := Larger64 - PUInt64(Smaller)[1];
    InterBorrow64 := Ord(Diff64 > Larger64);
    Diff64 := Diff64 - Borrow64;
    PUInt64(Result)[1] := Diff64;
    Borrow64 := InterBorrow64 or Ord(Diff64 = UInt64(-1)) and Borrow64;
  {$ELSE}
    Diff := Larger[0] - Smaller[0];
    InterBorrow := Ord(Diff > Larger[0]);   // there was a borrow if R0 > Larger[0].
    Diff := Diff - Borrow;
    Result[0] := Diff;
    Borrow := InterBorrow or Ord(Diff = $FFFFFFFF) and Borrow; // there was a borrow if R > R0.

    Diff := Larger[1] - Smaller[1];
    InterBorrow := Ord(Diff > Larger[1]);
    Dec(Diff, Borrow);
    Result[1] := Diff;
    Borrow := InterBorrow or Ord(Diff = $FFFFFFFF) and Borrow;

    Diff := Larger[2] - Smaller[2];
    InterBorrow := Ord(Diff > Larger[2]);
    Dec(Diff, Borrow);
    Result[2] := Diff;
    Borrow := InterBorrow or Ord(Diff = $FFFFFFFF) and Borrow;

    Diff := Larger[3] - Smaller[3];
    InterBorrow := Ord(Diff > Larger[3]);
    Dec(Diff, Borrow);
    Result[3] := Diff;
    Borrow := InterBorrow or Ord(Diff = $FFFFFFFF) and Borrow;
  {$ENDIF}

    Inc(Larger, CUnrollIncrement);
    Inc(Smaller, CUnrollIncrement);
    Inc(Result, CUnrollIncrement);
    Dec(LCount);
  end;

{$IFDEF CPUX64}
  Borrow := TLimb(Borrow64);
{$ENDIF}

  while LTail > 0 do
  begin
    Diff := Larger[0] - Smaller[0];
    InterBorrow := Ord(Diff > Larger[0]);
    Dec(Diff, Borrow);
    Result[0] := Diff;
    Borrow := InterBorrow or Ord(Diff = $FFFFFFFF) and Borrow;

    Inc(Larger);
    Inc(Smaller);
    Inc(Result);
    Dec(LTail);
  end;

  LTail := LSize and CUnrollMask;
  LCount := LSize shr CUnrollShift;

{$IFDEF CPUX64}
  Borrow64 := Borrow;
{$ENDIF}

  // Subtract, with borrow, 0 from Largest and store result in Result.
  while LCount > 0 do
  begin
  {$IFDEF CPUX64}
    Diff64 := PUInt64(Larger)[0] - Borrow64;
    PUInt64(Result)[0] := Diff64;
    Borrow64 := Ord(Diff64 = UInt64(-1)) and Borrow64;

    Diff64 := PUInt64(Larger)[1] - Borrow64;
    PUInt64(Result)[1] := Diff64;
    Borrow64 := Ord(Diff64 = UInt64(-1)) and Borrow64;
  {$ELSE}
    Diff := Larger[0] - Borrow;
    Result[0] := Diff;
    Borrow := Ord(Diff = $FFFFFFFF) and Borrow;

    Diff := Larger[1] - Borrow;
    Result[1] := Diff;
    Borrow := Ord(Diff = $FFFFFFFF) and Borrow;

    Diff := Larger[2] - Borrow;
    Result[2] := Diff;
    Borrow := Ord(Diff = $FFFFFFFF) and Borrow;

    Diff := Larger[3] - Borrow;
    Result[3] := Diff;
    Borrow := Ord(Diff = $FFFFFFFF) and Borrow;
  {$ENDIF}

    Inc(Larger, CUnrollIncrement);
    Inc(Result, CUnrollIncrement);
    Dec(LCount);
  end;

{$IFDEF CPUX64}
  Borrow := TLimb(Borrow64);
{$ENDIF}

  while LTail > 0 do
  begin
    Diff := Larger[0] - Borrow;
    Result[0] := Diff;
    Borrow := Ord(Diff = $FFFFFFFF) and Borrow;

    Inc(Larger);
    Inc(Result);
    Dec(LTail);
  end;
end;
{$ENDIF}

function BigInteger.IsZero: Boolean;
begin
  Result := FData = nil;
end;

class operator BigInteger.LeftShift(const Value: BigInteger; Shift: Integer): BigInteger;
var
  LimbShift: Integer;
  LSign: TLimb;
begin
  if Value.FData = nil then
    Exit(Zero);
  LSign := SignBitOf(Value.FSize);
  LimbShift := Shift div CLimbBits;
  Shift := Shift mod CLimbBits;
  Result.MakeSize((Value.FSize and SizeMask) + LimbShift + 1);
  if Shift > 0 then
    InternalShiftLeft(PLimb(Value.FData), PLimb(Result.FData) + LimbShift, Shift, (Value.FSize and SizeMask))
  else
    CopyLimbs(PLimb(Value.FData), PLimb(Result.FData) + LimbShift, (Value.FSize and SizeMask));
//    Move(Value.FData[0], Result.FData[LimbShift], (Value.FSize and SizeMask) * CLimbSize);
  Result.FSize := (Result.FSize and SizeMask) or Integer(LSign);
  Result.Compact;

  // The following can probably be omitted.
//  if LimbShift > 0 then
//    FillChar(Result.FData[0], CLimbSize * LimbShift, 0);
end;

class operator BigInteger.LessThan(const Left, Right: BigInteger): Boolean;
begin
  Result := Compare(Left, Right) < 0;
end;

class operator BigInteger.LessThanOrEqual(const Left, Right: BigInteger): Boolean;
begin
  Result := Compare(left, Right) <= 0;
end;

function BigInteger.BitLength: Integer;
begin
  if Self.FData = nil then
    Result := 0
  else
  begin
    Result := CLimbBits * (Size - 1) + Velthuis.Numerics.BitLength(FData[Size - 1]);

    // IsPowerOfTwo is expensive, but probably less expensive than a copy and
    // subsequent decrement, like in BitCount.
    if (FSize < 0) and (Self.IsPowerOfTwo) then
      Dec(Result);
  end;
end;

function BigInteger.BitCount: Integer;
var
  Mag: TMagnitude;
  I: Integer;
begin
  if FData = nil then
    Exit(0);

  if FSize > 0 then
    Mag := FData
  else
  begin
    Mag := Copy(FData);
    InternalDecrement(PLimb(Mag), FSize and SizeMask);
  end;

  Result := 0;
  for I := 0 to Size - 1 do
    Result := Result + Velthuis.Numerics.BitCount(Mag[I]);
end;

// http://stackoverflow.com/a/7982137/95954
class function BigInteger.Ln(const Int: BigInteger): Double;
var
  BLex: Integer;
  NewInt: BigInteger;
begin
  if Int.IsNegative then
    Exit(Math.NaN)
  else if Int.IsZero then
    Exit(Math.NegInfinity);
  BLex := Int.BitLength - 1022;
  if BLex > 0 then
    NewInt := Int shr BLex
  else
    NewInt := Int;
  Result := System.Ln(NewInt.AsDouble);
  if BLex > 0 then
    Result := Result + BLex * System.Ln(2.0);
end;

class function BigInteger.Log(const Int: BigInteger; Base: Double): Double;
begin
  Result := BigInteger.Ln(Int) / System.Ln(Base);
end;

class function BigInteger.Log10(const Int: BigInteger): Double;
begin
  Result := Log(Int, 10.0);
end;

class function BigInteger.Log2(const Int: BigInteger): Double;
begin
  Result := Log(Int, 2.0);
end;

class operator BigInteger.LogicalNot(const Int: BigInteger): BigInteger;
begin
  Result := Int;
  Inc(Result);
  if Result.FSize <> 0 then
    Result.FSize := Result.FSize xor SignMask;
end;

class function BigInteger.Max(const Left, Right: BigInteger): BigInteger;
begin
  if Left > Right then
    ShallowCopy(Left, Result)
  else
    ShallowCopy(Right, Result);
end;

class function BigInteger.Min(const Left, Right: BigInteger): BigInteger;
begin
  if Left < Right then
    ShallowCopy(Left, Result)
  else
    ShallowCopy(Right, Result);
end;

// http://stackoverflow.com/questions/8496182/calculating-powa-b-mod-n
class function BigInteger.ModPow(const ABase, AExponent, AModulus: BigInteger): BigInteger;
var
  Base: BigInteger;
  Exp: BigInteger;
begin
  Exp := AExponent;
  Base := ABase mod AModulus;
  Result := BigInteger.One;
  while Exp > Zero do
  begin
    if not Exp.IsEven then
      Result := (Result * Base) mod AModulus;
    Base := (Base * Base) mod AModulus;
    Exp := Exp shr 1;
  end;
end;

class operator BigInteger.Modulus(const Left, Right: BigInteger): BigInteger;
begin
  Result := Remainder(Left, Right);
end;

class operator BigInteger.Modulus(const Left: BigInteger; Right: UInt32): BigInteger;
begin
  Result := Remainder(Left, Right);
end;

class operator BigInteger.Modulus(const Left: BigInteger; Right: UInt16): BigInteger;
begin
  Result := Remainder(Left, Right);
end;

class procedure BigInteger.InternalMultiplyAndAdd(const Multiplicand: TMagnitude; Multiplicator, Addend: Word; const Res: TMagnitude);
{$IF DEFINED(PUREPASCAL)}
type
  WordRec = packed record
    Lo, Hi: Word;
  end;
var
  I: Cardinal;
  LProduct: Cardinal;
  LHighWord: Word;
  LLength: Cardinal;
begin
  LLength := Cardinal(Length(Multiplicand)) * 2;
  LHighWord := 0;
  I := 0;
  while I < LLength-1 do
  begin
    LProduct := PWord(Multiplicand)[I] * Multiplicator + LHighWord;
    PWord(Res)[I] := WordRec(LProduct).Lo;
    LHighWord := WordRec(LProduct).Hi;
    Inc(I);
  end;
  PWord(Res)[I] := LHighWord;
  I := 0;
  LHighword := 0;
  while LLength > 0 do
  begin
    Res[I] := Res[I] + Addend + LHighword;
    LHighWord := Word(Res[I] < Addend + LHighword);
    Addend := 0;
    Dec(LLength);
    Inc(I);
  end;
end;
{$ELSEIF DEFINED(WIN32)}
var
  LLength: Integer;
  LExtra: Word;
  LMultiplicator: Word;
  LProduct: Cardinal;
asm
       PUSH    EBX
       PUSH    ESI
       PUSH    EDI

       MOV     LExtra,CX
       MOV     LMultiplicator,DX

       MOV     ESI,EAX
       MOV     EDI,Res

       TEST    EAX,EAX
       JZ      @NotNil
       MOV     EAX,[EAX - TYPE NativeInt]

@NotNil:

       MOV     LLength,EAX
       XOR     ECX,ECX                          // ECX used for overflow.
       XOR     EBX,EBX                          // EBX = I
       CMP     EBX,LLength
       JNB     @SkipMult

@MultLoop:

       MOV     EAX,[ESI + CLimbSize*EBX]        // EAX,EDX required for multiplication.
       MOVZX   EDX,LMultiplicator
       MUL     EDX
       ADD     EAX,ECX                          // Add in overflow of previous multiplication.
       ADC     EDX,0
       MOV     [EDI + CLimbSize*EBX],EAX
       MOV     ECX,EDX                          // Overflow.
       LEA     EBX,[EBX + 1]
       CMP     EBX,LLength
       JB      @MultLoop

@SkipMult:

       MOV     [EDI + CLimbSize*EBX],EDX

       MOV     ECX,LLength
       XOR     EBX,EBX
       MOVZX   EAX,LExtra

@AddLoop:

       ADC     [EDI + CLimbSize*EBX],EAX
       JNC     @Exit
       MOV     EAX,0
       LEA     EBX,[EBX + 1]
       LEA     ECX,[ECX - 1]
       JECXZ   @Exit
       JMP     @AddLoop

@Exit:

       POP     EDI
       POP     ESI
       POP     EBX
end;
{$ELSE WIN64}
asm
      .PUSHNV RBX

       PUSH    R8                       // PUSH Extra
       MOVZX   R8D,DX                   // R8W = Multiplicator
       MOV     R10,RCX
       TEST    R10,R10
       JZ      @@1
       MOV     R10,[R10-8]              // R10D = Length(Multiplicand)
@@1:
       XOR     R11D,R11D                // R11D = I
       XOR     EBX,EBX
       CMP     R11D,R10D
       JNB     @@3
@@2:
       MOV     EAX,[RCX + CLimbSize*R11]
       MUL     EAX,R8D
       ADD     EAX,EBX
       ADC     EDX,0
       MOV     [R9 + CLimbSize*R11],EAX
       MOV     EBX,EDX
       INC     R11D
       CMP     R11D,R10D
       JB      @@2
@@3:
       MOV     [R9 + CLimbSize*R11],EDX
       POP     RDX                      // POP Extra
       MOVZX   EDX,DX
       XOR     EBX,EBX
@@4:
       ADC     [R9 + CLimbSize*RBX],EDX
       MOV     EDX,0                    //
       INC     EBX                      // These 3 instructions should not modify the carry flag!
       DEC     R10D                     //
       JNE     @@4
end;
{$IFEND}

class operator BigInteger.Multiply(const Left: BigInteger; Right: Word): BigInteger;
begin
  if (Right = 0) or ((Left.FSize and SizeMask) = 0) then
    Exit(Zero);
  Result.MakeSize((Left.FSize and SizeMask) + 2);
  InternalMultiplyAndAdd(Left.FData, Right, 0, Result.FData);
  Result.FSize := (Left.FSize and SignMask) or (Result.FSize and SizeMask);
  Result.Compact;
end;

class operator BigInteger.Multiply(Left: Word; const Right: BigInteger): BigInteger;
begin
  Result := Right * Left;
end;

class function BigInteger.MultiplyKaratsuba(const Left, Right: BigInteger): BigInteger;
var
  k, LSign: Integer;
  z0, z1, z2: BigInteger;
  x, y: TArray<BigInteger>;
begin
  if ((Left.FSize and SizeMask) < KaratsubaThreshold) or ((Right.FSize and SizeMask) < KaratsubaThreshold) then
    Exit(MultiplyBaseCase(Left, Right));

  //////////////////////////////////////////////////////////////////////////////////////////////////
  ///  This is a so called divide and conquer algorithm, solving a big task by dividing it up    ///
  ///  into easier (and hopefully faster, in total) smaller tasks.                               ///
  ///                                                                                            ///
  ///  Let's treat a BigInteger as a polynomial, i.e. x = x1 * B + x0, where B is chosen thus,   ///
  ///  that the top and the low part of the BigInteger are almost the same in size.              ///
  ///  The result R of the multiplication of two such polynomials can be seen as:                ///
  ///                                                                                            ///
  ///  R = (x1 * B + x0) * (y1 * B + y0) = x1 * y1 * B^2 + (x1 * y0 + x0 * y1) * B + x0 * y0     ///
  ///                                                                                            ///
  ///  say, z0 = x0 * y0                                                                         ///
  ///       z1 = x1 * y0 + x0 * y1                                                               ///
  ///       z2 = x1 * y1                                                                         ///
  ///                                                                                            ///
  ///  then                                                                                      ///
  ///  R = z2 * B^2 + z1 * B + z0                                                                ///
  ///                                                                                            ///
  ///  Karatsuba noted that:                                                                     ///
  ///                                                                                            ///
  ///  (x1 + x0) * (y1 + y0) = z2 + z1 + z0, so z1 = (x1 + x0) * (y1 + y0) - (z2 + z0)           ///
  ///                                                                                            ///
  ///  That reduced four multiplications and a few additions to three multiplications, a few     ///
  ///  additions and a subtraction. Surely the parts will be multilimb, but this is called       ///
  ///  recursively down until the sizes are under a threshold, and then simple base case         ///
  ///  (a.k.a. "schoolbook") multiplication is performed.                                        ///
  //////////////////////////////////////////////////////////////////////////////////////////////////

  //////////////////////////////////////////////////////////////////////////////////////////////////
  ///  Note: it may look tempting to use pointers into the original operands, to use one large   ///
  ///  buffer for all results, and to use InternalMultiply directly, but remember that           ///
  ///  InternalMultiply performs a basecase multiplication and it does NOT resurse into a        ///
  ///  deeper level of MultiplyKaratsuba, so after one level, the advantage gained by reducing   ///
  ///  the number of multiplications would be minimal.                                           ///
  ///                                                                                            ///
  ///  There is an overhead caused by using complete BigIntegers, but it is not as high as it    ///
  ///  may look.                                                                                 ///
  //////////////////////////////////////////////////////////////////////////////////////////////////

  LSign := (Left.FSize xor Right.FSize) and SignMask;

  k := (IntMax(Left.FSize and SizeMask, Right.FSize and SizeMask) + 1) shr 1;

  x := Left.Split(k, 2);
  y := Right.Split(k, 2);

  // Recursion further reduces the number of multiplications!
  z2 := MultiplyKaratsuba(x[1], y[1]);
  z0 := MultiplyKaratsuba(x[0], y[0]);
  z1 := MultiplyKaratsuba(x[1] - x[0], y[0] - y[1]) + (z2 + z0);

  Result := z0;
  Result.AddWithOffset(z2, k * 2);
  Result.AddWithOffset(z1, k);

  Result.FSize := (Result.FSize and SizeMask) or LSign;
end;

// Used by Karatsuba, Toom-Cook and Burnikel-Ziegler algorithms.
// Splits Self into BlockCount pieces of (at most) BlockSize limbs, starting with the least significant part.
function BigInteger.Split(BlockSize, BlockCount: Integer): TArray<BigInteger>;
var
  I: Integer;
begin
  SetLength(Result, BlockCount);
  for I := 0 to BlockCount - 1 do
  begin
    if (Self.FSize and BigInteger.SizeMask) > I * BlockSize then
    begin
      Result[I].MakeSize(IntMin(BlockSize, (Self.FSize and SizeMask) - I * BlockSize));
      CopyLimbs(PLimb(Self.FData) + I * BlockSize, PLimb(Result[I].FData), IntMin(BlockSize, (Self.FSize and SizeMask) - I * BlockSize));
      Result[I].Compact;
    end
    else
      ShallowCopy(Zero, Result[I]);
  end;
end;

{$IFNDEF PUREPASCAL}
class procedure BigInteger.InternalDivideBy3(Value, Result: PLimb; ASize: Integer);
const
  MultConst = $AAAAAAAB;
  MultConst2 = $55555556;
{$IFDEF WIN32}
asm
        PUSH    ESI
        PUSH    EDI
        PUSH    EBX

        MOV     ESI,EAX
        MOV     EDI,EDX
        XOR     EBX,EBX

@Loop:

        MOV     EAX,[ESI]
        SUB     EAX,EBX
        SETC    BL

        MOV     EDX,MultConst
        MUL     EAX,EDX
        MOV     [EDI],EAX

        CMP     EAX,MultConst2
        JB      @SkipInc
        INC     EBX
        CMP     EAX,MultConst
        JB      @SkipInc
        INC     EBX

@SkipInc:

        LEA     ESI,[ESI + CLimbSize]
        LEA     EDI,[EDI + CLimbSize]
        DEC     ECX
        JNE     @Loop

@Exit:

        POP     EBX
        POP     EDI
        POP     ESI
end;
{$ELSE WIN64}
asm
        XOR     R9D,R9D
        MOV     R10,RDX

@Loop:

        MOV     EAX,[RCX]
        SUB     EAX,R9D
        SETC    R9B

        MOV     EDX,MultConst
        MUL     EAX,EDX
        MOV     [R10],EAX

        CMP     EAX,MultConst2
        JB      @SkipInc
        INC     R9D
        CMP     EAX,MultConst
        JB      @SkipInc
        INC     R9D

@SkipInc:

        LEA     RCX,[RCX + CLimbSize]
        LEA     R10,[R10 + CLimbSize]
        DEC     R8D
        JNE     @Loop
end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

// Only works if it is known that there is no remainder and A is positive.
class function BigInteger.DivideBy3Exactly(const A: BigInteger): BigInteger;
const
  ModInverse3 = $AAAAAAAB; // Modular inverse of 3 modulo $100000000.
  ModInverse3t2 = $55555556; // 2 * ModInverse3
{$IFDEF PUREPASCAL}
var
  i: Integer;
  ai, w, qi, borrow: Int64;
begin
  if A.FData = nil then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;

  Result.MakeSize(A.FSize and SizeMask);
  borrow := 0;
  for i := 0 to (A.FSize and SizeMask) - 1 do
  begin
    ai := A.FData[i];
    w := ai - borrow;
    if borrow > ai then
      borrow := 1
    else
      borrow := 0;

    qi := (w * ModInverse3) and $FFFFFFFF;
    Result.FData[i] := UInt32(qi);

    if qi >= ModInverse3t2 then
    begin
      Inc(borrow);
      if qi >= ModInverse3 then
        Inc(borrow);
    end;
  end;

  Result.Compact;
end;
{$ELSE !PUREPASCAL}
begin
  if A.FData = nil then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;

  Result.MakeSize(A.FSize and SizeMask);
  InternalDivideBy3(PLimb(A.FData), PLimb(Result.FData), A.FSize and SizeMask);
  Result.Compact;
end;
{$ENDIF !PUREPASCAL}

class function BigInteger.MultiplyToomCook3(const Left, Right: BigInteger): BigInteger;
var
  k: Integer;
  a, b: TArray<BigInteger>;
  a02, b02: BigInteger;
  v0, v1, vm1, v2, vinf: BigInteger;
  t1, t2: BigInteger;
  Sign: Integer;
begin
  // Step 1: if n < threshold then return KaratsubaMultiply(A, B)
  if ((Left.FSize and SizeMask) < ToomCook3Threshold) and ((Right.FSize and SizeMask) < ToomCook3Threshold) then
    Exit(MultiplyKaratsuba(Left, Right));

  Sign := (Left.FSize xor Right.FSize) and SignMask;

  // Richard P. Brent and Paul Zimmermann,
  // "Modern Computer Arithmetic", version 0.5.1 of April 28, 2010
  // http://arxiv.org/pdf/1004.4710v1.pdf

  //////////////////////////////////////////////////////////////////////////////
  // Hint to myself: If necessary, take a look at Bodrato's improved version. //
  // But be aware that most of his *code* is GPL-ed and now part of GMP.      //
  //////////////////////////////////////////////////////////////////////////////

  // Step 2: write A = a0 +a1*x + a2*x^2, B = b0 + b1*x + b2*x^2, with x = ß^k.
  k := (IntMax(Left.FSize and SizeMask, Right.FSize and SizeMask) + 2) div 3;

  a := Left.Split(k, 3);
  b := Right.Split(k, 3);

  // Evaluation at x = -1, 0, 1, 2 and +inf.

  // Step 3: v0 <- ToomCook3(a0, b0)
  v0 := MultiplyToomCook3(a[0], b[0]);

  // Step 4a: a02 <- a0 + a2, b02 <- b0 + b2
  a02 := a[0] + a[2];
  b02 := b[0] + b[2];

  // Step 5: v(-1) <- ToomCook3(a02 - a1, b02 - b1) = ToomCook3(a0 + a2 - a1, b0 + b2 - b1)
  vm1 := MultiplyToomCook3(a02 - a[1], b02 - b[1]);

  // Intermediate step: a'02 = a02 + a1, b'02 = b02 + b1
  a02 := a02 + a[1];
  b02 := b02 + b[1];

  // Step 4b: v1 <- ToomCook3(a02 + a1, b02 + b1) = ToomCook3(a'02, b'02)
  v1 := MultiplyToomCook3(a02, b02);

  // Step 6: v2 <- ToomCook3(a0 + 2*a1 + 4*a2, b0 + 2*b1 + 4*b2)
  // Note: first operand is a0 + a1 + a1 + a2 + a2 + a2 + a2 = 2*(a0 + a1 + a2 + a2) - a0 = 2*(a'02 + a2) - a0
  v2 := MultiplyToomCook3((a02 + a[2]) shl 1 - a[0], (b02 + b[2]) shl 1 - b[0]);

  // Step 7: v(inf) <- ToomCook3(a2, b2)
  vinf := MultiplyToomCook3(a[2], b[2]);

  // Step 8: t1 <- (3*v0 + 2*v(−1) + v2)/6 − 2 * v(inf), t2 <- (v1 + v(−1))/2
  t1 := DivideBy3Exactly(((v0 + vm1) shl 1 + v2 + v0) shr 1) - (vinf shl 1);
  t2 := (v1 + vm1) shr 1;

  // Step 9: c0 <- v0, c1 <- v1 - t1, c2 <- t2 - v0 - vinf, c3 <- t1 - t2, c4 <- vinf
  ShallowCopy(v0, Result);
  Result.AddWithOffset(vinf, 4*k);
  Result.AddWithOffset(t1 - t2, 3*k);
  Result.AddWithOffset(t2 - v0 - vinf, 2*k);
  Result.AddWithOffset(v1 - t1, k);

  Result.FSize := (Result.FSize and SizeMask) or Sign;

end;

{$IFDEF Experimental}
class function BigInteger.MultiplyToomCook3Threshold(const Left, Right: BigInteger; Threshold: Integer): BigInteger;
var
  k: Integer;
  a, b: TArray<BigInteger>;
  a02, b02: BigInteger;
  v0, v1, vm1, v2, vinf: BigInteger;
  t1, t2: BigInteger;
  c0, c1, c2, c3, c4: BigInteger;
  Sign: Integer;
begin
  Sign := (Left.FSize xor Right.FSize) and SignMask;

  // Richard P. Brent and Paul Zimmermann,
  // "Modern Computer Arithmetic", version 0.5.1 of April 28, 2010
  // http://arxiv.org/pdf/1004.4710v1.pdf

  // Hint to myself: If necessary, take a look at Bodrato's improved version.
  // But be aware that most of his *code* is GPL-ed and now part of GMP.

  // Step 1: if n < threshold then return KaratsubaMultiply(A, B)
  if ((Left.FSize and SizeMask) < Threshold) or ((Right.FSize and SizeMask) < Threshold) then
    Exit(MultiplyKaratsuba(Left, Right));

  // Step 2: write A = a0 +a1*x + a2*x^2, B = b0 + b1*x + b2*x^2, with x = ß^k.
  k := (IntMax(Left.FSize and SizeMask, Right.FSize and SizeMask) + 2) div 3;

  a := Left.Split(k, 3);
  b := Right.Split(k, 3);

  // Step 3: v0 <- ToomCook3(a0, b0)
  v0 := MultiplyToomCook3Threshold(a[0], b[0], Threshold);

  // Step 4a: a02 <- a0 + a2, b02 <- b0 + b2
  a02 := a[0] + a[2];
  b02 := b[0] + b[2];

  // Step 4b: v1 <- ToomCook3(a02 + a1, b02 + b1)
  v1 := MultiplyToomCook3Threshold(a02 + a[1], b02 + b[1], Threshold);

  // Step 5: v(-1) <- ToomCook3(a02 - a1, b02 - b1)
  vm1 := MultiplyToomCook3Threshold(a02 - a[1], b02 - b[1], Threshold);

  // Step 6: v2 <- ToomCook3(a0 + 2*a1 + 4*a2, b0 + 2*b1 + 4*b2)
  v2 := MultiplyToomCook3Threshold(a[0] + a[1] shl 1 + a[2] shl 2, b[0] + b[1] shl 1 + b[2] shl 2, Threshold);

  // Step 7: v(inf) <- ToomCook3(a2, b2)
  vinf := MultiplyToomCook3Threshold(a[2], b[2], Threshold);

  // Step 8: t1 <- (3*v0 + 2*v(-1) + v2) / 6 - 2*v(inf), t2 <- (v1 + v(-1)) / 2
  t1 := ((v0 shl 1 + v0 + vm1 shl 1 + v2) shr 1) div BigInteger(3) - vinf shl 1;  // $$RV make exactdiv3 or divbyword with constant
  t2 := (v1 + vm1) shr 1;

  // Step 9:
  c0 := v0;
  c1 := v1 - t1;
  c2 := t2 - v0 - vinf;
  c3 := t1 - t2;
  c4 := vinf;

  // Output: AB = c0 + c1*ß^k + c2*ß^2*k + c3*ß^3*k + c4*ß^4*k
  Result := c0;
  if c4.FData <> nil then
    Result.AddWithOffset(c4, 4 * k);
  if c1.FData <> nil then
    Result.AddWithOffset(c1, k);
  if c2.FData <> nil then
    Result.AddWithOffset(c2, 2 * k);
  if c3.FData <> nil then
    Result.AddWithOffset(c3, 3 * k);

  Result.FSize := (Result.FSize and SizeMask) or Sign;
end;

class function BigInteger.MultiplyKaratsubaThreshold(const Left, Right: BigInteger; Threshold: Integer): BigInteger;
var
  NDiv2Shift, NDiv2: Integer;
  LeftUpper, RightUpper: BigInteger;
  LeftLower, RightLower: BigInteger;
  Upper, Middle, Lower: BigInteger;
  LSize, LSign: Integer;
begin
  if (Left.Size < Threshold) or (Right.Size < Threshold) then
    Exit(MultiplyBaseCase(Left, Right));

  LSign := (Left.FSize xor Right.FSize) and SignMask;
  LSize := IntMax((Left.FSize and SizeMask), (Right.FSize and SizeMask));
  NDiv2Shift := (LSize and $FFFFFFFE) shl 4; // := LSize div 2 * SizeOf(TLimb);
  NDiv2 := LSize div 2;

  // Split Left
  if (Left.FSize and SizeMask) > NDiv2 then
  begin
    LeftLower.MakeSize(NDiv2);
    CopyLimbs(PLimb(Left.FData), PLimb(LeftLower.FData), NDiv2);
    LeftUpper.MakeSize((Left.FSize and SizeMask) - NDiv2);
    CopyLimbs(PLimb(Left.FData) + NDiv2, PLimb(LeftUpper.FData), (Left.FSize and SizeMask) - NDiv2);
    LeftLower.Compact;
  end
  else
  begin
    ShallowCopy(Zero, LeftUpper);
    ShallowCopy(Left, LeftLower);
  end;

  // Split Right
  if (Right.FSize and SizeMask) > NDiv2 then
  begin
    RightLower.MakeSize(NDiv2);
    CopyLimbs(PLimb(Right.FData), PLimb(RightLower.FData), NDiv2);
    RightUpper.MakeSize((Right.FSize and SizeMask) - NDiv2);
    CopyLimbs(PLimb(Right.FData) + NDiv2, PLimb(RightUpper.FData), (Right.FSize and SizeMask) - NDiv2);
    RightLower.Compact;
  end
  else
  begin
    ShallowCopy(Zero, RightUpper);
    ShallowCopy(Right, RightLower)
  end;

  Upper := MultiplyKaratsubaThreshold(LeftUpper, RightUpper, Threshold);
  Lower := MultiplyKaratsubaThreshold(LeftLower, RightLower, Threshold);
  Middle := MultiplyKaratsubaThreshold(LeftUpper - LeftLower, RightLower - RightUpper, Threshold) + (Lower + Upper);

  // Can't just move these values into place, because they still overlap when shifted.
  Result := Upper shl (NDiv2Shift + NDiv2Shift) + Middle shl NDiv2Shift + Lower;
  Result.FSize := (Result.FSize and SizeMask) or LSign;
end;
{$ENDIF Experimental}

class function BigInteger.SqrKaratsuba(const Value: BigInteger): BigInteger;
var
  NDiv2Shift, NDiv2: Integer;
  ValueUpper: BigInteger;
  ValueLower: BigInteger;
  Upper, Middle, Lower: BigInteger;
  LSize: Integer;
begin
  LSize := (Value.FSize and SizeMask);
  NDiv2Shift := (LSize and $FFFFFFFE) shl 4; // := LSize div 2 * SizeOf(TLimb);
  NDiv2 := LSize div 2;

  ValueLower.MakeSize(NDiv2);
  CopyLimbs(PLimb(Value.FData), PLimb(ValueLower.FData), NDiv2);
  ValueUpper.MakeSize((Value.FSize and SizeMask) - NDiv2);
  CopyLimbs(PLimb(Value.FData) + NDiv2, PLimb(ValueUpper.FData), (Value.FSize and SizeMask) - NDiv2);
  ValueLower.Compact;

  Upper := Sqr(ValueUpper);
  Lower := Sqr(ValueLower);
  Middle := (ValueUpper * ValueLower) shl 1;

  // Can't simply move these values into place, because they still overlap when shifted.
  Result := Upper shl (NDiv2Shift + NDiv2Shift) + Middle shl NDiv2Shift + Lower;
  Result.FSize := Result.FSize and SizeMask;
end;

class function BigInteger.SqrKaratsubaThreshold(const Value: BigInteger; Threshold: Integer): BigInteger;
var
  k: Integer;
//  x0, x1: BigInteger;
  x: TArray<BigInteger>;
  z2, z1, z0: BigInteger;
  LSize: Integer;
begin
  LSize := (Value.FSize and SizeMask);
  if LSize < Threshold then
  begin
    Exit(MultiplyKaratsuba(Value, Value));
  end;

  k := LSize div 2;

  x := Value.Split(k, 2);

  z2 := SqrKaratsubaThreshold(x[1], Threshold);
  z0 := SqrKaratsubaThreshold(x[0], Threshold);
  z1 := (x[1] * x[0]) shl 1;

  Result := z0;
  if z2.FData <> nil then
    Result.AddWithOffset(z2, 2*k);
  if z1.FData <> nil then
    Result.AddWithOffset(z1, k);

  Result.FSize := Result.FSize and SizeMask;
end;

class function BigInteger.Multiply(const Left, Right: BigInteger): BigInteger;
var
  LResult: BigInteger; // Avoid prematurely overwriting result when it is same as one of the operands.
begin
  if (Left.FData = nil) or (Right.FData = nil) then
  begin
    ShallowCopy(BigInteger.Zero, Result);
    Exit;
  end;

  if ((Left.FSize and SizeMask) < KaratsubaThreshold) or ((Right.FSize and SizeMask) < KaratsubaThreshold) then
  begin
    // The following block is "Result := MultiplyBaseCase(Left, Right);" written out in full.
    LResult.MakeSize((Left.FSize and SizeMask) + (Right.FSize and SizeMask) + 1);
    InternalMultiply(PLimb(Left.FData), PLimb(Right.FData), PLimb(LResult.FData), (Left.FSize and SizeMask), (Right.FSize and SizeMask));
    LResult.Compact;
    LResult.FSize := (LResult.FSize and SizeMask) or ((Left.FSize xor Right.FSize) and SignMask);
    ShallowCopy(LResult, Result);
  end
  else
  begin
    if ((Left.FSize and SizeMask) < ToomCook3Threshold) and ((Right.FSize and SizeMask) < ToomCook3Threshold) then
      Result := MultiplyKaratsuba(Left, Right)
    else
      Result := MultiplyToomCook3(Left, Right);
  end;
end;

class function BigInteger.MultiplyThreshold(const Left, Right: BigInteger; Threshold: Integer): BigInteger;
var
  LResult: BigInteger; // Avoid prematurely overwriting result when it is same as one of the operands.
begin
  if (Left.FData = nil) or (Right.FData = nil) then
  begin
    ShallowCopy(BigInteger.Zero, Result);
    Exit;
  end;

  if ((Left.FSize and SizeMask) < Threshold) or ((Right.FSize and SizeMask) < Threshold) then
  begin
    LResult.MakeSize((Left.FSize and SizeMask) + (Right.FSize and SizeMask) + 1);
    InternalMultiply(PLimb(Left.FData), PLimb(Right.FData), PLimb(LResult.FData), (Left.FSize and SizeMask), (Right.FSize and SizeMask));
    LResult.Compact;
    LResult.SetSign(SignBitOf(Left.FSize) xor SignBitOf(Right.FSize));
    ShallowCopy(LResult, Result);
  end
  else
    Result := MultiplyKaratsubaThreshold(Left, Right, Threshold);
end;

class function BigInteger.MultiplyBaseCase(const Left, Right: BigInteger): BigInteger;
var
  LResult: BigInteger; // Avoid prematurely overwriting result when it is same as one of the operands.
begin
  if (Left.FData = nil) or (Right.FData = nil) then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;

  LResult.MakeSize((Left.FSize and SizeMask) + (Right.FSize and SizeMask) + 1);
  InternalMultiply(PLimb(Left.FData), PLimb(Right.FData), PLimb(LResult.FData), (Left.FSize and SizeMask), (Right.FSize and SizeMask));
  LResult.Compact;
  LResult.SetSign(SignBitOf(Left.FSize) xor SignBitOf(Right.FSize));
  ShallowCopy(LResult, Result);
end;

class operator BigInteger.Multiply(const Left, Right: BigInteger): BigInteger;
begin
  Result := Multiply(Left, Right);
end;

class procedure BigInteger.SetBase(const Value: TNumberBase);
begin
  FBase := Value;
end;

procedure BigInteger.SetSign(Value: Integer);
begin
  FSize := (FSize and SizeMask) or (Ord(Value < 0) * SignMask);
end;

function BigInteger.Subtract(const Other: BigInteger): PBigInteger;
var
  MinusOther: BigInteger;
begin
  ShallowCopy(Other, MinusOther);
  MinusOther.FSize := MinusOther.FSize xor SignMask;
  Result := Add(MinusOther);
end;

class function BigInteger.Subtract(const Left, Right: BigInteger): BigInteger;
const
  BoolMasks: array[Boolean] of Integer = (SignMask, 0);
var
  Largest, Smallest: PBigInteger;
  Res: BigInteger;
  Comparison: TValueSign;
begin
  if Left.FData = nil then
  begin
    ShallowCopy(Right, Result);
    if Result.FSize <> 0 then
      Result.FSize := Result.FSize xor SignMask;
    Exit;
  end;
  if Right.FData = nil then
  begin
    ShallowCopy(Left, Result);
    Exit;
  end;

  Comparison := InternalCompare(PLimb(Left.FData), PLimb(Right.FData), (Left.FSize and SizeMask), (Right.FSize and SizeMask));
  if (Comparison = 0) and (Left.Sign = Right.Sign) then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;

  if Comparison > 0 then
  begin
    Largest := @Left;
    Smallest := @Right;
  end
  else
  begin
    Largest := @Right;
    Smallest := @Left;
  end;

  Res.MakeSize((Largest^.FSize and SizeMask) + 1);
  if Largest^.Sign = Smallest^.Sign then
    FInternalSubtract(PLimb(Largest^.FData), PLimb(Smallest^.FData), PLimb(Res.FData), (Largest^.FSize and SizeMask), (Smallest^.FSize and SizeMask))
  else
    FInternalAdd(PLimb(Largest^.FData), PLimb(Smallest^.FData), PLimb(Res.FData), (Largest^.FSize and SizeMask), (Smallest^.FSize and SizeMask));
  Res.FSize := (Res.FSize and SizeMask) or BoolMasks[(Largest^.FSize < 0) xor (Largest = @Left)];
  Res.Compact;
  Result := Res;
end;

class operator BigInteger.Subtract(const Left, Right: BigInteger): BigInteger;
begin
  Result := Subtract(Left, Right);
end;

procedure BigInteger.EnsureSize(RequiredSize: Integer);
begin
  RequiredSize := RequiredSize and SizeMask;
  if RequiredSize > Length(FData) then
    SetLength(FData, (RequiredSize + 4) and CapacityMask);
  FSize := (FSize and SignMask) or RequiredSize;
end;

procedure BigInteger.MakeSize(RequiredSize: Integer);
begin
  SetLength(FData, (RequiredSize + 4) and CapacityMask);
  FillChar(FData[0], Length(FData) * CLimbSize, 0);
  FSize := (FSize and SignMask) or RequiredSize;
end;

// In Win32, we keep what we have. In Win64, we switch, depending on Size. At 25 limbs or above, the unrolled loop version is faster.
class procedure BigInteger.InternalNegate(Source, Dest: PLimb; Size: Integer);
{$IFDEF PUREPASCAL}
var
  R: TLimb;
begin
  R := 0;
  while R = 0 do
  begin
    R := (not Source^) + 1;
    Dest^ := R;
    Inc(Source);
    Inc(Dest);
    Dec(Size);
    if Size = 0 then
      Exit;
  end;
  while Size > 0 do
  begin
    Dest^ := not Source^;
    Inc(Source);
    Inc(Dest);
    Dec(Size);
  end;
end;
{$ELSE}
{$IFDEF WIN32}

// This is faster than an unrolled loop with NOT and ADC, especially for smaller BigIntegers.

asm
        PUSH    ESI

@Loop:

        MOV     ESI,[EAX]
        NOT     ESI
        INC     ESI
        MOV     [EDX],ESI
        LEA     EAX,[EAX + CLimbSize]
        LEA     EDX,[EDX + CLimbSize]
        DEC     ECX
        JE      @Exit
        TEST    ESI,ESI                 // Only if ESI is 0, a carry occurred.
        JE      @Loop

@RestLoop:                              // No more carry. We can stop incrementing.

        MOV     ESI,[EAX]
        NOT     ESI
        MOV     [EDX],ESI
        LEA     EAX,[EAX + CLimbSize]
        LEA     EDX,[EDX + CLimbSize]
        DEC     ECX
        JNE     @RestLoop

@Exit:

        POP     ESI
end;
{$ELSE WIN64}
asm

        CMP     R8D,25
        JA      @Unrolled

// Plain version. Faster for small BigIntegers (<= 25 limbs).

@Loop:

        MOV     EAX,[RCX]
        NOT     EAX
        INC     EAX
        MOV     [RDX],EAX
        LEA     RCX,[RCX + CLimbSize]
        LEA     RDX,[RDX + CLimbSize]
        DEC     R8D
        JE      @Exit
        TEST    EAX,EAX
        JE      @Loop

@RestLoop:

        MOV     EAX,[RCX]
        NOT     EAX
        MOV     [RDX],EAX
        LEA     RCX,[RCX + CLimbSize]
        LEA     RDX,[RDX + CLimbSize]
        DEC     R8D
        JNE     @RestLoop
        JMP     @Exit

// Unrolled version. Faster for larger BigIntegers.

@Unrolled:

        TEST    RCX,RCX
        JE      @Exit
        XCHG    R8,RCX
        MOV     R9,RDX
        XOR     EDX,EDX
        MOV     R10D,ECX
        AND     R10D,CUnrollMask
        SHR     ECX,CUnrollShift
        STC
        JE      @Rest

@LoopU:

        MOV     RAX,[R8]
        NOT     RAX
        ADC     RAX,RDX
        MOV     [R9],RAX

        MOV     RAX,[R8 + DLimbSize]
        NOT     RAX
        ADC     RAX,RDX
        MOV     [R9 + DLimbSize],RAX

        LEA     R8,[R8 + 2*DLimbSize]
        LEA     R9,[R9 + 2*DLimbSize]
        LEA     ECX,[ECX - 1]
        JECXZ   @Rest
        JMP     @LoopU

@Rest:

        LEA     RAX,[@JumpTable]
        JMP     [RAX + R10*TYPE Pointer]

        // Align jump table with NOPs

        NOP

@JumpTable:

        DQ      @Exit
        DQ      @Rest1
        DQ      @Rest2
        DQ      @Rest3

@Rest3:

        MOV     RAX,[R8]
        NOT     RAX
        ADC     RAX,RDX
        MOV     [R9],RAX

        MOV     EAX,[R8 + DLimbSize]
        NOT     EAX
        ADC     EAX,EDX
        MOV     [R9 + DLimbSize],EAX

        JMP     @Exit

@Rest2:

        MOV     RAX,[R8]
        NOT     RAX
        ADC     RAX,RDX
        MOV     [R9],RAX

        JMP     @Exit

@Rest1:

        MOV     EAX,[R8]
        NOT     EAX
        ADC     EAX,EDX
        MOV     [R9],EAX

@Exit:
end;
{$ENDIF WIN64}
{$ENDIF !PUREPASCAL}

// Needed for Karatsuba, ToomCook and Burnikel-Ziegler
// Assumes non-negative parameters and non-negative self.
procedure BigInteger.AddWithOffset(const Addend: BigInteger; Offset: Integer);
begin
  Self.EnsureSize(IntMax(Offset + (Addend.FSize and SizeMask), Self.FSize and SizeMask));
  if Offset >= (Self.FSize and SizeMask) then
    CopyLimbs(PLimb(Addend.FData), PLimb(Self.FData) + Offset, Addend.FSize and SizeMask)
  else
    FInternalAdd(PLimb(Self.FData) + Offset, PLimb(Addend.FData), PLimb(Self.FData) + Offset, (Self.FSize and SizeMask) - Offset, Addend.FSize and SizeMask);
  Self.Compact;
end;

class procedure BigInteger.InternalBitwise(const Left, Right: BigInteger; var Result: BigInteger; PlainOp, OppositeOp, InversionOp: TDyadicOperator);

//---------------------------------------------------------------------------------------------------------------//
//                                                                                                               //
//  The code for the bitwise operators AND, OR and XOR does not differ much.                                     //
//  Since the results should be the results for two's complement, two's complement semantics are emulated.       //
//  Originally, this meant that the magnitudes of negative bigintegers were negated, then the                    //
//  operation was performed and if the result had to be negative, the magnitude of the result was negated.       //
//  These negation steps were slow, so now this code uses some logical shortcuts.                                //
//                                                                                                               //
//  The rules used are like follows.                                                                             //
//                                                                                                               //
//  In the following, A and B represent positive integer values, so -A and -B represent negative values.         //
//  Note that, to keep this simple, 0 -- i.e. FData = nil -- is not handled at all. That is handled              //
//  by the caller and then this routine is not called.                                                           //
//                                                                                                               //
//  Relation between negation and inversion of an integer/magnitude:                                             //
//  -A = not A + 1    => not A = -A - 1                                                                          //
//  -A = not (A - 1)                                                                                             //
//                                                                                                               //
//  Note: A and B are magnitudes here. Negating a BigInteger is as simple as flipping the sign bit. That         //
//  does not work for magnitudes.                                                                                //
//                                                                                                               //
//  Boolean (and bitwise) rules followed:                                                                        //
//  not not A       = A                                                                                          //
//  not (A and B)   = not A or not B                                                                             //
//  not (A or B)    = not A and not B                                                                            //
//  not (A xor B)   = not A xor B = A xor not B                                                                  //
//  not A xor not B = A xor B                                                                                    //
//                                                                                                               //
//  Expressions used here:                                                                                       //
//                                                                                                               //
//  A and B      = A and B                               ; both positive, plain operation                        //
//  A and -B     = A and not (B - 1)                     ; one positive, one negative, result positive           //
//  -(-A and -B) = -(not (A - 1) and not (B - 1))        ; both negative, result is negative too                 //
//               = - not ((A - 1) or (B - 1)))                                                                   //
//               = (A - 1) or (B - 1) + 1                                                                        //
//                                                                                                               //
//  A or B       = A or B                                ; both positive                                         //
//  -(A or -B)   = -(A or not (B - 1))                   ; one positive, one negative, result is negative too    //
//               = - not (not A and (B - 1))                                                                     //
//               = ((B - 1) and not A) + 1                                                                       //
//  -(-A or -B)  = -(not (A - 1) or not (B - 1))         ; both negative, result is negative too                 //
//               = not (not (A - 1) or not (B - 1) + 1                                                           //
//               = (A - 1) and (B - 1) + 1                                                                       //
//                                                                                                               //
//  A xor B      = A xor B                               ; both positive                                         //
//  -(A xor -B)  = -(A xor not (B - 1))                  ; one positive, one negative, result is negative too    //
//               = not (A xor not (B - 1)) + 1                                                                   //
//               = A xor (B - 1) + 1                                                                             //
//  -A xor -B    = not (A - 1) xor not (B - 1)           ; both negative, result is positive                     //
//               = (A - 1) xor (B - 1)                                                                           //
//                                                                                                               //
//  So the only "primitives" required are routines for AND, OR, XOR and AND NOT. The latter is not really        //
//  a primitive, but it is so easy to implement, that it can be considered one. NOT is cheap, does not require   //
//  complicated carry handling.                                                                                  //
//  Routines like Inc and Dec are cheap too: you only loop as long as there is a carry (or borrow). Often, that  //
//  is only over very few limbs.                                                                                 //
//                                                                                                               //
//  Primitives (InternalAnd(), etc.) routines were optimized too. Loops were unrolled, 64 bit registers used     //
//  where possible, both sizes are passed, so the operations can be done on the original data. The latter        //
//  reduces the need for copying into internal buffers.                                                          //
//                                                                                                               //
//  These optimizations made bitwise operators 2-3 times as fast as with the simple implementations before.      //
//                                                                                                               //
//---------------------------------------------------------------------------------------------------------------//

var
  LSize, RSize, MinSize, MaxSize: Integer;
  LPtr, RPtr: PLimb;
begin
  LSize := Left.FSize and SizeMask;
  RSize := Right.FSize and SizeMask;
  MinSize := IntMin(LSize, RSize);
  MaxSize := IntMax(LSize, RSize);

  if ((Left.FSize xor Right.FSize) and SignMask) = 0 then
  begin
    if (Left.FSize > 0) then
    begin
      if @PlainOp = @InternalAnd then
        Result.MakeSize(MinSize)
      else
        Result.MakeSize(MaxSize);
      PlainOp(PLimb(Left.FData), PLimb(Right.FData), PLimb(Result.FData), LSize, RSize);
    end
    else
    begin
      LPtr := AllocLimbs(LSize + RSize);                        // LPtr := Copy(Left);
      RPtr := LPtr + LSize;                                     // RPtr := Coyp(Right);
      CopyLimbs(PLimb(Left.FData), LPtr, LSize);
      CopyLimbs(PLimb(Right.FData), RPtr, RSize);
      InternalDecrement(LPtr, LSize);                           // LPtr^ := LPtr^ - 1
      InternalDecrement(RPtr, RSize);                           // RPtr^ := RPtr^ - 1
      Result.FSize := 0;
      Result.MakeSize(MaxSize);
      OppositeOp(LPtr, RPtr, PLimb(Result.FData), LSize, RSize);        // Opposite op: AND --> OR, OR --> AND, XOR --> XOR
      if @PlainOp = @InternalXor then
        Result.FSize := Result.FSize and SizeMask               // Make positive.
      else
      begin
        InternalIncrement(PLimb(Result.FData), MaxSize);                // Result := Result + 1
        Result.FSize := Result.FSize or SignMask;               // Make negative.
      end;
      FreeMem(LPtr);
    end;
  end
  else
  begin
    if (Left.FSize > 0) then
    begin
      RPtr := AllocLimbs(RSize);
      CopyLimbs(PLimb(Right.FData), RPtr, RSize);
      InternalDecrement(RPtr, RSize);
      Result.FSize := 0;
      if @PlainOp = @InternalOr then
        Result.MakeSize(RSize)
      else
        Result.MakeSize(MaxSize);
      InversionOp(PLimb(Left.FData), RPtr, PLimb(Result.FData), LSize, RSize);  // Inversion op: AND --> AND NOT, OR --> NOT AND, XOR --> XOR
      if @PlainOp = @InternalAnd then
        Result.FSize := Result.FSize and SizeMask               // Make positive.
      else
      begin
         InternalIncrement(PLimb(Result.FData), (Result.FSize and SizeMask));
         Result.FSize := Result.FSize or SignMask;              // Make negative.
      end;
      FreeMem(RPtr);
    end
    else
    begin
      LPtr := AllocLimbs(LSize);
      CopyLimbs(PLimb(Left.FData), LPtr, LSize);
      InternalDecrement(LPtr, LSize);
      Result.FSize := 0;
      if @PlainOp = @InternalOr then
        Result.MakeSize(LSize)
      else
        Result.MakeSize(MaxSize);
      InversionOp(PLimb(Right.FData), LPtr, PLimb(Result.FData), RSize, LSize);
      if @PlainOp = @InternalAnd then
        Result.FSize := Result.FSize and SizeMask
      else
      begin
         InternalIncrement(PLimb(Result.FData), (Result.FSize and SizeMask));
         Result.FSize := Result.FSize or SignMask;
      end;
      FreeMem(LPtr);
    end;
  end;
  Result.Compact;
end;

class operator BigInteger.Negative(const Int: BigInteger): BigInteger;
begin
  // Magnitude is not modified, so a shallow copy is enough!
  ShallowCopy(Int, Result);
  if Result.FSize <> 0 then
    Result.FSize := Result.FSize xor SignMask;
end;

class function BigInteger.Parse(const S: string; aBase : Integer): BigInteger;
var
  TryResult: BigInteger;
begin
  if TryParse(S, TryResult, aBase) then
    Result := TryResult
  else
    Error(ecParse, S);
end;

class function BigInteger.Pow(const ABase: BigInteger; AExponent: Integer): BigInteger;
var
  Base: BigInteger;
begin
  Base := ABase;
  Result := One;
  while AExponent > 0 do
  begin
    if Odd(AExponent) then
      Result := Result * Base;
    Base := Sqr(Base);
    AExponent := AExponent shr 1;
  end;
end;

class operator BigInteger.NotEqual(const Left, Right: BigInteger): Boolean;
begin
  Result := Compare(Left, Right) <> 0;
end;

class procedure BigInteger.Octal;
begin
  FBase := 8;
end;

class function BigInteger.Remainder(const Left: BigInteger; Right: UInt16): BigInteger;
var
  LQuotient: TMagnitude;
begin
  if Right = 0 then
    Error(ecDivByZero);
  Result.MakeSize(1);
  SetLength(LQuotient, (Left.FSize and SizeMask));
  if not InternalDivMod32(PLimb(Left.FData), Right, PLimb(LQuotient), PLimb(Result.FData), (Left.FSize and SizeMask)) then
    Error(ecInvalidArg);
  Result.Compact;
  if Result.FSize <> 0 then
    Result.FSize := (Result.FSize and SizeMask) or SignBitOf(Left.FSize);
end;

class function BigInteger.Remainder(const Left: BigInteger; Right: UInt32): BigInteger;
var
  LQuotient: TMagnitude;
begin
  if Right = 0 then
    Error(ecDivByZero);
  Result.MakeSize(1);
  SetLength(LQuotient, (Left.FSize and SizeMask));
  if not InternalDivMod32(PLimb(Left.FData), Right, PLimb(LQuotient), PLimb(Result.FData), (Left.FSize and SizeMask)) then
    Error(ecInvalidArg);
  Result.Compact;
  if Result.FSize <> 0 then
    Result.FSize := (Result.FSize and SizeMask) or SignBitOf(Left.FSize);
end;

class function BigInteger.Remainder(const Left, Right: BigInteger): BigInteger;
var
  Quotient: BigInteger;
  LSize, RSize: Integer;
begin
  if Right.FData = nil then
    Error(ecDivByZero);

  LSize := Left.FSize and SizeMask;
  RSize := Right.FSize and SizeMask;

  case InternalCompare(PLimb(Left.FData), PLimb(Right.FData), LSize, RSize) of
    -1:
      begin
        ShallowCopy(Left, Result);
        Exit;
      end;
    0:
      begin
        ShallowCopy(Zero, Result);
        Exit;
      end;
    else
      begin
        if ShouldUseBurnikelZiegler(LSize, RSize) then
          DivModBurnikelZiegler(Left, Right, Quotient, Result)
        else
          DivModKnuth(Left, Right, Quotient, Result);

        // In Delphi, sign of remainder is sign of dividend.
        if Result.FSize <> 0 then
          Result.FSize := (Result.FSize and SizeMask) or SignBitOf(Left.FSize);
      end;
  end;
end;

function BigInteger.Remainder(const Other: BigInteger): PBigInteger;
begin
  Result := @Self;
  Self := Self mod Other;
end;

class operator BigInteger.RightShift(const Value: BigInteger; Shift: Integer): BigInteger;
// Note: this emulates two's complement, more or less like the bitwise operators.
var
  LSize: Integer;
  ShiftOffset: Integer;
  RSize: Integer;
  P: PLimb;
begin
  if Value.FData = nil then
  begin
    ShallowCopy(Zero, Result);
    Exit;
  end;

  if Value.FSize > 0 then
  begin
    LSize := (Value.FSize and SizeMask);
    ShiftOffset := Shift shr 5;
    RSize := LSize - ShiftOffset;
    if RSize <= 0 then
    begin
      ShallowCopy(Zero, Result);
      Exit;
    end;
    Shift := Shift and $1F;
    Result.MakeSize(RSize);
    if Shift > 0 then
      InternalShiftRight(PLimb(Value.FData) + ShiftOffset, PLimb(Result.FData), Shift, RSize)
    else
      CopyLimbs(PLimb(Value.FData) + ShiftOffset, PLimb(Result.FData), RSize);
    Result.Compact;
  end
  else
  begin

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //  This branch had to be written out completely. Using any local BigInteger, even a hidden BigInteger (to do     //
    //  something like "Inc(Result);" a hidden BigInteger is allocated) will slow this down enormously.               //
    //  The mere presence of a BigInteger causes InitializeArray and FinalizeArray to be compiled in,                 //
    //  and a hidden try-finally to be placed around the routine.                                                     //
    //  Removing all local BigIntegers sped up this branch by a factor of 3 and the entire routine by a factor of 2.  //
    //                                                                                                                //
    //  Original code:                                                                                                //
    //  Result := MinusOne - ((MinusOne - Value) shr Shift);                                                          //
    //                                                                                                                //
    //  See: http://blogs.teamb.com/rudyvelthuis/2015/10/04/27826                                                     //
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    LSize := (Value.FSize and SizeMask);
    P := AllocLimbs(LSize);
    // try
    CopyLimbs(PLimb(Value.FData), P, LSize);
    InternalDecrement(P, LSize);
    while (LSize > 0) and (P[LSize - 1] = 0) do
      Dec(LSize);
    if LSize = 0 then
    begin
      ShallowCopy(MinusOne, Result);
      Exit;
    end;
    ShiftOffset := Shift shr 5;
    if ShiftOffset >= LSize then
    begin
      ShallowCopy(MinusOne, Result);
      Exit;
    end;
    Shift := Shift and $1F;
    Result.FSize := 0;
    Result.MakeSize(LSize - ShiftOffset);
    if Shift = 0 then
      CopyLimbs(P + ShiftOffset, PLimb(Result.FData), LSize - ShiftOffset)
    else
      BigInteger.InternalShiftRight(P + ShiftOffset, PLimb(Result.FData), Shift, LSize - ShiftOffset);
    // finally
    FreeMem(P);
    // end;

    Result.Compact;
    if Result.FData = nil then
    begin
      ShallowCopy(MinusOne, Result);
      Exit;
    end;
    InternalIncrement(PLimb(Result.FData), (Result.FSize and SizeMask));
    Result.FSize := (Result.FSize and SizeMask) or SignMask;
  end;
end;

class operator BigInteger.Implicit(const Value: string): BigInteger;
begin
  if not TryParse(Value, Result, FBase) then
    Error(ecParse, Value);
end;

class operator BigInteger.Explicit(const Int: BigInteger): Double;
begin
  Result := Int.AsDouble;
end;

class operator BigInteger.Explicit(const ADouble: Double): BigInteger;
begin
  Result.Create(ADouble);
end;

class operator BigInteger.Inc(const Int: BigInteger): BigInteger;
begin
  if Int.FData = nil then
  begin
    ShallowCopy(One, Result);
    Exit;
  end;
  Result.FData := Copy(Int.FData);
  Result.FSize := Int.FSize;
  if Result.FSize > 0 then
  begin
    Result.EnsureSize((Result.FSize and SizeMask) + 1);
    InternalIncrement(PLimb(Result.FData), (Result.FSize and SizeMask));
  end
  else
    InternalDecrement(PLimb(Result.FData), (Result.FSize and SizeMask));
  Result.Compact;
end;

class operator BigInteger.Dec(const Int: BigInteger): BigInteger;
begin
  if Int.FData = nil then
  begin
    ShallowCopy(MinusOne, Result);
    Exit;
  end;
  Result.FData := Copy(Int.FData);
  Result.FSize := Int.FSize;
  if Result.FSize < 0 then
  begin
    Result.EnsureSize((Result.FSize and SizeMask) + 1);
    InternalIncrement(PLimb(Result.FData), (Result.FSize and SizeMask));
  end
  else
    InternalDecrement(PLimb(Result.FData), (Result.FSize and SizeMask));
  Result.Compact;
end;

procedure BigInteger.FromString(const Value: string; aBase : Integer);
begin
  if not TryParse(Value, Self, aBase) then
  begin
    Self.FData := nil;
    Self.FSize := 0;
  end;
end;

function BigInteger.Add(const Other: BigInteger): PBigInteger;
var
  SelfSize, OtherSize: Integer;
  Comparison: TValueSign;
begin
  Result := @Self;
  if Other.IsZero then
    Exit;
  if Self.IsZero then
  begin
    Self := Other;
    Exit;
  end;
  FData := Copy(FData);
  SelfSize := FSize and SizeMask;
  OtherSize := Other.FSize and SizeMask;
  if Self.IsNegative = Other.IsNegative then
  begin
    EnsureSize(IntMax(SelfSize, OtherSize) + 1);
    FInternalAdd(PLimb(Self.FData), PLimb(Other.FData), PLimb(Self.FData), SelfSize, OtherSize);
  end
  else
  begin
    // Different signs, so subtract.
    EnsureSize(IntMax(SelfSize, OtherSize));
    Comparison := InternalCompare(PLimb(Self.FData), PLimb(Other.FData), (Self.FSize and SizeMask), (Other.FSize and SizeMask));
    if Comparison = 0 then
    begin
      Self := Zero;
      Exit;
    end;

    if Comparison > 0 then
    begin
      FInternalSubtract(PLimb(Self.FData), PLimb(Other.FData), PLimb(Self.FData), SelfSize, OtherSize);
    end
    else
    begin
      FInternalSubtract(PLimb(Other.FData), PLimb(Self.FData), PLimb(Self.FData), OtherSize, SelfSize);
      Self.FSize := Self.FSize xor SignMask;
    end;
  end;
  Compact;
end;

class procedure BigInteger.AvoidPartialFlagsStall(Value: Boolean);
{$IFDEF PUREPASCAL}
begin
  FInternalAdd := InternalAddPurePascal;
  FInternalSubtract := InternalSubtractPurePascal;
end;
{$ELSE}
begin
  FAvoidStall := Value;
  if Value then
  begin
    FInternalAdd := InternalAddModified;
    FInternalSubtract := InternalSubtractModified;
  end
  else
  begin
    FInternalAdd := InternalAddPlain;
    FInternalSubtract := InternalSubtractPlain;
  end;
end;
{$ENDIF}

function BigInteger.Multiply(const Other: BigInteger): PBigInteger;
begin
  Result := @Self;
  Self := Self * Other;
end;

procedure FlipBigIntegerBit(var B: BigInteger; Index: Integer); inline;
begin
  B.FData := Copy(B.FData);
  B.EnsureSize(IntMax(Index shr 5 + 1, B.FSize and BigInteger.SizeMask));
  B.FData[Index shr 5] := B.FData[Index shr 5] xor (1 shl (Index and $1F));
  B.Compact;
end;

function BigInteger.TestBit(Index: Integer): Boolean;

///////////////////////////////////////////////////////////////////////
//  Two's complement semantics are required.                         //
//                                                                   //
//  Note: -A = not (A - 1) = not A + 1                               //
//                                                                   //
//  Example, assuming 16 bit limbs, negating goes like follows:      //
//                                                                   //
//    -$1234 5678 9ABC 0000 0000 -> $EDCB A987 6544 0000 0000        //
//  0:                      0000 ->                      FFFF + 1    //
//  1:                 0000      ->                 FFFF + 1         //
//  2:            9ABC           ->            6543 + 1              //
//  3:       5678                ->       A987                       //
//  4:  1234                     ->  EDCB                            //
//                                                                   //
//  So accessing limb 4 or 3:    Data := not Data                    //
//     accessing limb 2, 1 or 0: Data := not Data + 1                //
///////////////////////////////////////////////////////////////////////

var
  I: Integer;
  Mask: TLimb;
  Data: TLimb;
begin
  if FData = nil then

    // Zero, so no bit set. Return False.
    Result := False
  else if Index >= BitLength then

    // Beyond bit length, so return sign
    Result := (FSize and SignMask) <> 0
  else
  begin
    Mask := 1 shl (Index and $1F);
    Index := Index shr 5;
    Data := FData[Index];

    // Emulate negation if this BigInteger is negative.
    // Not necessary if BigInteger is positive.
    if (FSize and SignMask) <> 0 then
    begin

      // -A = not A + 1.
      Data := not Data; // Wait with the + 1, see below.
      I := 0;

      // See if carry propagates from lowest limb to limb containing the bit. If so, increment Data.
      while (I <= Index) and (FData[I] = 0) do
        Inc(I);
      if Index <= I then
        Inc(Data);
    end;

    // Get the bit.
    Result := (Data and Mask) <> 0;
  end;
end;

function BigInteger.SetBit(Index: Integer): BigInteger;
begin
  Result := Self;
  if not TestBit(Index) then
    FlipBigIntegerBit(Result, Index);
end;

function BigInteger.ClearBit(Index: Integer): BigInteger;
begin
  Result := Self;
  if TestBit(Index) then
    FlipBigIntegerBit(Result, Index);
end;

function BigInteger.FlipBit(Index: Integer): BigInteger;
begin
  Result := Self;
  FlipBigIntegerBit(Result, Index);
end;

class function BigInteger.NthRoot(const Radicand: BigInteger; Nth: Integer): BigInteger;

// http://stackoverflow.com/a/32541958/95954

var
  Estimate, EstimateToNthPower, NewEstimateToNthPower, TwoToNthPower: BigInteger;
  AdditionalBit: Integer;
begin
  if Radicand = BigInteger.One then
    Exit(BigInteger.One);
  if Nth = 0 then
    Exit(BigInteger.Zero);                      // Error: there is no zeroth root.
  if Nth = 1 then
    Exit(Radicand);

  TwoToNthPower := BigInteger.Pow(2, Nth);

  // First estimate. Very likely closer to final value than the original BigInteger.One.
  Estimate := BigInteger.One shl (Radicand.BitLength div Nth);
  // EstimateToNthPower is Estimate ^ Nth
  EstimateToNthPower := BigInteger.Pow(Estimate, Nth);

  // Shift Estimate right until Estimate ^ Nth >= Value.
  while EstimateToNthPower < Radicand do
  begin
    Estimate := Estimate shl 1;
    EstimateToNthPower := TwoToNthPower * EstimateToNthPower;
  end;

  // EstimateToNthPower is the lowest power of two such that EstimateToNthPower >= Value
  if EstimateToNthPower = Radicand then            // Answer is a power of two.
    Exit(Estimate);

   // Estimate is highest power of two such that Estimate ^ Nth < Value
  Estimate := Estimate shr 1;
  AdditionalBit := Estimate.BitLength - 2;
  if AdditionalBit < 0 then
    Exit(Estimate);
  EstimateToNthPower := BigInteger.Pow(Estimate, Nth);

  // We already have the top bit. Now repeatedly add decreasingly significant bits and check if
  // that addition is not too much. If so, remove the bit again.
  while Radicand > EstimateToNthPower do
  begin
    Estimate := Estimate.SetBit(AdditionalBit);
    NewEstimateToNthPower := BigInteger.Pow(Estimate, Nth);
    if NewEstimateToNthPower > Radicand then                        // Did we add too much? If so, remove bit.
      Estimate := Estimate.ClearBit(AdditionalBit)
    else
      EstimateToNthPower := NewEstimateToNthPower;               // Otherwise update EstimateToNthPower (= Estimate^Nth).
    Dec(AdditionalBit);
    if AdditionalBit < 0 then
      Break;
  end;

  // All bits covered, so we have our result.
  Result := Estimate;
end;

class procedure BigInteger.NthRootRemainder(const Radicand: BigInteger; Nth: Integer; var Root, Remainder: BigInteger);
begin
  Root := NthRoot(Radicand, Nth);
  Remainder := Radicand - Pow(Root, Nth);
end;

class function BigInteger.Sqr(const Value: BigInteger): BigInteger;
begin
  if (Value.FSize and SizeMask) < KaratsubaSqrThreshold then
    Result := Value * Value
  else
    Result := SqrKaratsuba(Value);
end;

class function BigInteger.Sqrt(const Radicand: BigInteger): BigInteger;
var
  Estimate: BigInteger;
  AdditionalBit: Integer;
  EstimateSquared: BigInteger;
  Temp: BigInteger;
begin
  if Radicand = BigInteger.One then
    Exit(BigInteger.One);

  if Radicand.IsNegative then
    raise EInvalidOp.Create(SSqrtBigInteger);

  Estimate := BigInteger.One shl ((Radicand.BitLength) shr 1);

  EstimateSquared := Sqr(Estimate);
  while EstimateSquared < Radicand do
  begin
    Estimate := Estimate shl 1;
    EstimateSquared := EstimateSquared shl 2;
  end;

  if EstimateSquared = Radicand then
    Exit(Estimate);

  Estimate := Estimate shr 1;
  EstimateSquared := EstimateSquared shr 2;
  AdditionalBit := Estimate.BitLength - 2;
  if AdditionalBit < 0 then
    Exit(Estimate);

  while Radicand > EstimateSquared do
  begin

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // Instead of multiplying two large BigIntegers, i.e. (Estimate + 2 ^ AdditionalBit) ^ 2, we try to be clever:  //
    // If A = Estimate; B = 2 ^ AdditionalBit then:                                                                 //
    // sqr(A + B) = A*A + 2*A*B + B*B = sqrA + (A * 2 + B)*B, so                                                    //
    // Temp := EstimateSquared + (Estimate * 2 + 2 ^ AdditionalBit) * 2 ^ AdditionalBit                             //
    // Since 2 ^ AdditionalBit and the multiplication can be done with shifts, we finally get the following.        //
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    Temp := Estimate shl 1;
    Temp := Temp.SetBit(AdditionalBit);
    Temp := Temp shl AdditionalBit + EstimateSquared;
    if Temp <= Radicand then
    begin
      ShallowCopy(Temp, EstimateSquared);
      Estimate := Estimate.SetBit(AdditionalBit);
    end;
    Dec(AdditionalBit);
    if AdditionalBit < 0 then
      Break;
  end;
  Result := Estimate;
end;

class procedure BigInteger.SqrtRemainder(const Radicand: BigInteger; var Root, Remainder: BigInteger);
begin
  Root := Sqrt(Radicand);
  Remainder := Radicand - Root * Root;
end;

class procedure BigInteger.DivThreeHalvesByTwo(const LeftUpperMid, LeftLower, Right, RightUpper, RightLower: BigInteger;
  N: Integer; var Quotient, Remainder: BigInteger);
var
  Q, R: BigInteger;
begin
  Q := BigInteger.Zero;
  R := BigInteger.Zero;
  if (LeftUpperMid shr N) = RightUpper then
  begin
    Q := (BigInteger.One shl N) - BigInteger.One;
    R := LeftUpperMid - (RightUpper shl N) + RightUpper;
  end
  else
    DivTwoDigitsByOne(LeftUpperMid, RightUpper, N, Q, R);

  Quotient := Q;
  Remainder := ((R shl N) or LeftLower) - Q * RightLower;
  while Remainder < 0 do
  begin
    Dec(Quotient);
    Remainder := Remainder + Right;
  end;
end;

class procedure BigInteger.DivTwoDigitsByOne(const Left, Right: BigInteger; N: Integer; var Quotient, Remainder: BigInteger);
var
  NIsOdd: Boolean;
  LeftCopy, RightCopy: BigInteger;
  HalfN: Integer;
  HalfMask: BigInteger;
  RightUpper, RightLower: BigInteger;
  QuotientUpper, QuotientLower: BigInteger;
  Quot, Rem: BigInteger;
begin
  Quot := BigInteger.Zero;
  Rem := BigInteger.Zero;
  if N <= BigInteger.BurnikelZieglerThreshold * 32 then
  begin
    BigInteger.DivModKnuth(Left, Right, Quot, Rem);
    Quotient := Quot;
    Remainder := Rem;
    Exit;
  end;

  NIsOdd := Odd(N);
  if NIsOdd then
  begin
    LeftCopy := Left shl 1;
    RightCopy := Right shl 1;
    Inc(N);
  end
  else
  begin
    LeftCopy := Left;
    RightCopy := Right;
  end;
  HalfN := N shr 1;
  HalfMask := (BigInteger.One shl HalfN) - BigInteger.One;

  RightUpper := RightCopy shr HalfN;
  RightLower := RightCopy and HalfMask;

  DivThreeHalvesByTwo(LeftCopy shr N, (LeftCopy shr HalfN) and HalfMask, RightCopy, RightUpper, RightLower, HalfN, QuotientUpper, Rem);
  DivThreeHalvesByTwo(Rem, LeftCopy and HalfMask, RightCopy, RightUpper, RightLower, HalfN, QuotientLower, Rem);

  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  //                                                                                                                 //
  //  Grade school division, but with (very) large digits, dividing [a1,a2,a3,a4] by [b1,b2]:                        //
  //                                                                                                                 //
  //    +----+----+----+----+     +----+----+     +----+                                                             //
  //    | a1 | a2 | a3 | a4 |  /  | b1 | b2 |  =  | q1 |        DivideThreeHalvesByTwo(a1a2, a3, b1b2, n, q1, r1r2)  //
  //    +----+----+----+----+     +----+----+     +----+                                                             //
  //    +--------------+  |                          |                                                               //
  //    |   b1b2 * q1  |  |                          |                                                               //
  //    +--------------+  |                          |                                                               //
  //  - ================  v                          |                                                               //
  //         +----+----+----+     +----+----+        | +----+                                                        //
  //         | r1 | r2 | a4 |  /  | b1 | b2 |  =       | q2 |   DivideThreeHalvesByTwo(r1r2, a4, b1b2, n, q1, r1r2)  //
  //         +----+----+----+     +----+----+        | +----+                                                        //
  //         +--------------+                        |    |                                                          //
  //         |   b1b2 * q2  |                        |    |                                                          //
  //         +--------------+                        |    |                                                          //
  //       - ================                        v    v                                                          //
  //              +----+----+                     +----+----+                                                        //
  //              | r1 | r2 |                     | q1 | q2 |   r1r2 = a1a2a3a4 mod b1b2, q1q2 = a1a2a3a4 div b1b2   //
  //              +----+----+                     +----+----+ ,                                                      //
  //                                                                                                                 //
  //  Note: in the diagram above, a1, b1, q1, r1 etc. are the most significant "digits" of their numbers.            //
  //                                                                                                                 //
  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  if NIsOdd then
    Rem := Rem shr 1;
  Remainder := Rem;
  Quotient := (QuotientUpper shl HalfN) or QuotientLower;
end;

class procedure BigInteger.InternalDivModBurnikelZiegler(const Left, Right: BigInteger; var Quotient, Remainder: BigInteger);
var
  LCopy: BigInteger;
  N: Integer;
  DigitMask: BigInteger;
  LeftDigits: array of BigInteger;
  LeftDigitsIndex : Integer;
  QuotientDigit: BigInteger;
begin
  LCopy := Left;
  N := Right.BitLength;
  DigitMask := (BigInteger.One shl N) - BigInteger.One;
  LeftDigitsIndex := 0;

   while not LCopy.IsZero do begin
      if LeftDigitsIndex=Length(LeftDigits) then
         SetLength(LeftDigits, LeftDigitsIndex+8);
      LeftDigits[LeftDigitsIndex] := LCopy and DigitMask;
      Inc(LeftDigitsIndex);
      LCopy := LCopy shr N;
   end;

   if (LeftDigitsIndex > 0) and (LeftDigits[LeftDigitsIndex-1] >= Right) then
      Remainder := BigInteger.Zero
   else begin
      Remainder := LeftDigits[LeftDigitsIndex-1];
      Dec(LeftDigitsIndex);
   end;

   QuotientDigit := BigInteger.Zero;
   Quotient := BigInteger.Zero;
   while LeftDigitsIndex > 0 do
   begin
      DivTwoDigitsByOne((Remainder shl N) + LeftDigits[LeftDigitsIndex-1], Right, N, QuotientDigit, Remainder);
      Dec(LeftDigitsIndex);
      Quotient := (Quotient shl N) + QuotientDigit;
   end;
end;

class procedure BigInteger.DivModBurnikelZiegler(const Left, Right: BigInteger; var Quotient, Remainder: BigInteger);
var
  Q, R: BigInteger;
begin
  if Right.IsZero then
    raise Exception.Create('Division by zero')
  else if Right.IsNegative then
  begin
    DivModBurnikelZiegler(-Left, -Right, Q, R);
    Quotient := Q;
    Remainder := -R;
    Exit;
  end
  else if Left.IsNegative then
  begin
    DivModBurnikelZiegler(not Left, Right, Q, R);
    Quotient := not Q;
    Remainder := Right + not R;
    Exit;
  end
  else if Left.IsZero then
  begin
    Quotient := BigInteger.Zero;
    Remainder := BigInteger.Zero;
    Exit;
  end
  else
  begin
    InternalDivModBurnikelZiegler(Left, Right, Q, R);
    Quotient := Q;
    Remainder := R;
    Exit;
  end;
end;

end.
